commit 536b475d5b4f533262d8a3072456f0b7ddbc6254 Author: Krishna Ayyalasomayajula Date: Tue Jan 27 14:28:10 2026 -0600 should work if we compile it together on the AMD machin in the cloud diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ec8e6b7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,37 @@ +# OS junk +.DS_Store +Thumbs.db + +# Editor settings +.vscode/** +.idea/** +*.swp +*.swo + +# Build system metadata (keep actual build dirs tracked) +CMakeFiles/** +CMakeCache.txt +cmake_install.cmake +Makefile +compile_commands.json + +# Logs and temp files +*.log +*.tmp +*.bak +*.old + +build/** +*.toml +.cache/** + + +**/*.aux +**/*.fdb_latexmk +**/*.fls +**/*.log +**/*.synctex.gz +**/*.blg +**/*.bcf +**/*.run.xml +**/*.bbl diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..6c66587 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "external/spdlog"] + path = external/spdlog + url = https://github.com/gabime/spdlog.git diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..9cea092 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,98 @@ +cmake_minimum_required(VERSION 3.20) + +# ----------------------------- +# Force Clang before project() +# ----------------------------- +if(NOT DEFINED CMAKE_C_COMPILER) + set(CMAKE_C_COMPILER clang CACHE STRING "" FORCE) +endif() + +if(NOT DEFINED CMAKE_CXX_COMPILER) + set(CMAKE_CXX_COMPILER clang++ CACHE STRING "" FORCE) +endif() + +# ----------------------------- +# Project setup +# ----------------------------- +project(learning-hip LANGUAGES CXX) + +set(CMAKE_CXX_STANDARD 23) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) +set(CMAKE_BUILD_TYPE Release) + +# ----------------------------- +# System dependency: libsodium +# ----------------------------- +find_package(PkgConfig REQUIRED) + + +# ----------------------------- +# Dependencies (vendored) +# ----------------------------- +add_subdirectory(external/spdlog) + + +# ----------------------------- +# Executable +# ----------------------------- +add_executable(${PROJECT_NAME} src/main.cpp) + +# ----------------------------- +# Auto-discover include dirs under src/ +# ----------------------------- +file(GLOB_RECURSE SRC_SUBDIRS LIST_DIRECTORIES true + "${CMAKE_CURRENT_SOURCE_DIR}/src/*" +) + +set(SRC_INCLUDES "") +foreach(dir ${SRC_SUBDIRS}) + if(IS_DIRECTORY ${dir}) + list(APPEND SRC_INCLUDES ${dir}) + endif() +endforeach() + +# ----------------------------- +# Auto-discover all .cpp except main.cpp +# ----------------------------- +file(GLOB_RECURSE ALL_CPP + "${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp" +) + +list(REMOVE_ITEM ALL_CPP + "${CMAKE_CURRENT_SOURCE_DIR}/src/main.cpp" +) + +# ----------------------------- +# Apply sources and includes +# ----------------------------- +target_compile_definitions(learning-hip PRIVATE __HIP_PLATFORM_AMD__=1) +target_sources(${PROJECT_NAME} PRIVATE ${ALL_CPP}) +target_include_directories(${PROJECT_NAME} PRIVATE + ${SRC_INCLUDES} + ${CMAKE_CURRENT_SOURCE_DIR}/external/hip # vendored HIP headers +) + +# ----------------------------- +# Link libraries +# ----------------------------- +target_link_libraries(${PROJECT_NAME} PRIVATE + spdlog::spdlog + amdhip64 # HIP runtime only +) + +# ----------------------------- +# Compiler warnings (Clang / GCC) +# ----------------------------- +if(CMAKE_CXX_COMPILER_ID MATCHES "Clang|GNU") + target_compile_options(${PROJECT_NAME} PRIVATE + -Wall + -Wextra + -Wpedantic + -Wconversion + -Wshadow + -Wnull-dereference + -Wdouble-promotion + ) +endif() + diff --git a/external/hip/channel_descriptor.h b/external/hip/channel_descriptor.h new file mode 100644 index 0000000..21d5f20 --- /dev/null +++ b/external/hip/channel_descriptor.h @@ -0,0 +1,39 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_CHANNEL_DESCRIPTOR_H +#define HIP_INCLUDE_HIP_CHANNEL_DESCRIPTOR_H + +// Some standard header files, these are included by hc.hpp and so want to make them avail on both +// paths to provide a consistent include env and avoid "missing symbol" errors that only appears +// on NVCC path: + + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) +#include +#elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#include +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif + +#endif diff --git a/external/hip/device_functions.h b/external/hip/device_functions.h new file mode 100644 index 0000000..8c1ba57 --- /dev/null +++ b/external/hip/device_functions.h @@ -0,0 +1,38 @@ +/* +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_DEVICE_FUNCTIONS_H +#define HIP_INCLUDE_HIP_DEVICE_FUNCTIONS_H + +#if !defined(__HIPCC_RTC__) +#include +#endif + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) +#include +#elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#include +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif + +#endif diff --git a/external/hip/driver_types.h b/external/hip/driver_types.h new file mode 100644 index 0000000..1b64d16 --- /dev/null +++ b/external/hip/driver_types.h @@ -0,0 +1,681 @@ +/* +Copyright (c) 2015 - 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_DRIVER_TYPES_H +#define HIP_INCLUDE_HIP_DRIVER_TYPES_H + +#if !defined(__HIPCC_RTC__) +#include +#if __cplusplus +#include +#else +#include // size_t +#endif +#endif + +#if !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#include "driver_types.h" +#elif defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) + +/** + * @defgroup DriverTypes Driver Types + * @{ + * This section describes the driver data types. + * + */ + +typedef void* hipDeviceptr_t; +/** + * HIP channel format kinds + */ +typedef enum hipChannelFormatKind { + hipChannelFormatKindSigned = 0, ///< Signed channel format + hipChannelFormatKindUnsigned = 1, ///< Unsigned channel format + hipChannelFormatKindFloat = 2, ///< Float channel format + hipChannelFormatKindNone = 3 ///< No channel format +} hipChannelFormatKind; +/** + * HIP channel format descriptor + */ +typedef struct hipChannelFormatDesc { + int x; + int y; + int z; + int w; + enum hipChannelFormatKind f; ///< Channel format kind +} hipChannelFormatDesc; +/** @brief The hipTexRefSetArray function flags parameter override format value*/ +#define HIP_TRSA_OVERRIDE_FORMAT 0x01 +/** @brief The hipTexRefSetFlags function flags parameter read as integer value*/ +#define HIP_TRSF_READ_AS_INTEGER 0x01 +/** @brief The hipTexRefSetFlags function flags parameter normalized coordinate value*/ +#define HIP_TRSF_NORMALIZED_COORDINATES 0x02 +/** @brief The hipTexRefSetFlags function flags parameter srgb value*/ +#define HIP_TRSF_SRGB 0x10 + +typedef struct hipArray* hipArray_t; +typedef const struct hipArray* hipArray_const_t; +/** + * HIP array format + */ +typedef enum hipArray_Format { + HIP_AD_FORMAT_UNSIGNED_INT8 = 0x01, ///< Unsigned 8-bit array format + HIP_AD_FORMAT_UNSIGNED_INT16 = 0x02, ///< Unsigned 16-bit array format + HIP_AD_FORMAT_UNSIGNED_INT32 = 0x03, ///< Unsigned 32-bit array format + HIP_AD_FORMAT_SIGNED_INT8 = 0x08, ///< Signed 8-bit array format + HIP_AD_FORMAT_SIGNED_INT16 = 0x09, ///< Signed 16-bit array format + HIP_AD_FORMAT_SIGNED_INT32 = 0x0a, ///< Signed 32-bit array format + HIP_AD_FORMAT_HALF = 0x10, ///< Half array format + HIP_AD_FORMAT_FLOAT = 0x20 ///< Float array format +} hipArray_Format; +/** + * HIP array descriptor + */ +typedef struct HIP_ARRAY_DESCRIPTOR { + size_t Width; ///< Width of the array + size_t Height; ///< Height of the array + enum hipArray_Format Format; ///< Format of the array + unsigned int NumChannels; ///< Number of channels of the array +} HIP_ARRAY_DESCRIPTOR; + +/** + * HIP 3D array descriptor + */ +typedef struct HIP_ARRAY3D_DESCRIPTOR { + size_t Width; ///< Width of the array + size_t Height; ///< Height of the array + size_t Depth; ///< Depth of the array + enum hipArray_Format Format; ///< Format of the array + unsigned int NumChannels; ///< Number of channels of the array + unsigned int Flags; ///< Flags of the array +} HIP_ARRAY3D_DESCRIPTOR; +#if !defined(__HIPCC_RTC__) +/** + * HIP 2D memory copy parameters + */ +typedef struct hip_Memcpy2D { + size_t srcXInBytes; ///< Source width in bytes + size_t srcY; ///< Source height + hipMemoryType srcMemoryType; ///< Source memory type + const void* srcHost; ///< Source pointer + hipDeviceptr_t srcDevice; ///< Source device + hipArray_t srcArray; ///< Source array + size_t srcPitch; ///< Source pitch + size_t dstXInBytes; ///< Destination width in bytes + size_t dstY; ///< Destination height + hipMemoryType dstMemoryType; ///< Destination memory type + void* dstHost; ///< Destination pointer + hipDeviceptr_t dstDevice; ///< Destination device + hipArray_t dstArray; ///< Destination array + size_t dstPitch; ///< Destination pitch + size_t WidthInBytes; ///< Width in bytes of the 2D memory copy + size_t Height; ///< Height of the 2D memory copy +} hip_Memcpy2D; +#endif // !defined(__HIPCC_RTC__) +/** + * HIP mipmapped array + */ +typedef struct hipMipmappedArray { + void* data; ///< Data pointer of the mipmapped array + struct hipChannelFormatDesc desc; ///< Description of the mipmapped array + unsigned int type; ///< Type of the mipmapped array + unsigned int width; ///< Width of the mipmapped array + unsigned int height; ///< Height of the mipmapped array + unsigned int depth; ///< Depth of the mipmapped array + unsigned int min_mipmap_level; ///< Minimum level of the mipmapped array + unsigned int max_mipmap_level; ///< Maximum level of the mipmapped array + unsigned int flags; ///< Flags of the mipmapped array + enum hipArray_Format format; ///< Format of the mipmapped array + unsigned int num_channels; ///< Number of channels of the mipmapped array +} hipMipmappedArray; +/** + * HIP mipmapped array pointer + */ +typedef struct hipMipmappedArray* hipMipmappedArray_t; +typedef hipMipmappedArray_t hipmipmappedArray; +typedef const struct hipMipmappedArray* hipMipmappedArray_const_t; +/** + * HIP resource types + */ +typedef enum hipResourceType { + hipResourceTypeArray = 0x00, ///< Array resource + hipResourceTypeMipmappedArray = 0x01, ///< Mipmapped array resource + hipResourceTypeLinear = 0x02, ///< Linear resource + hipResourceTypePitch2D = 0x03 ///< Pitch 2D resource +} hipResourceType; +typedef enum HIPresourcetype_enum { + HIP_RESOURCE_TYPE_ARRAY = 0x00, ///< Array resource + HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY = 0x01, ///< Mipmapped array resource + HIP_RESOURCE_TYPE_LINEAR = 0x02, ///< Linear resource + HIP_RESOURCE_TYPE_PITCH2D = 0x03 ///< Pitch 2D resource +} HIPresourcetype, + hipResourcetype; +/** + * HIP texture address modes + */ +typedef enum HIPaddress_mode_enum { + HIP_TR_ADDRESS_MODE_WRAP = 0, ///< Wrap address mode + HIP_TR_ADDRESS_MODE_CLAMP = 1, ///< Clamp address mode + HIP_TR_ADDRESS_MODE_MIRROR = 2, ///< Mirror address mode + HIP_TR_ADDRESS_MODE_BORDER = 3 ///< Border address mode +} HIPaddress_mode; +/** + * HIP filter modes + */ +typedef enum HIPfilter_mode_enum { + HIP_TR_FILTER_MODE_POINT = 0, ///< Filter mode point + HIP_TR_FILTER_MODE_LINEAR = 1 ///< Filter mode linear +} HIPfilter_mode; +/** + * HIP texture descriptor + */ +typedef struct HIP_TEXTURE_DESC_st { + HIPaddress_mode addressMode[3]; ///< Address modes + HIPfilter_mode filterMode; ///< Filter mode + unsigned int flags; ///< Flags + unsigned int maxAnisotropy; ///< Maximum anisotropy ratio + HIPfilter_mode mipmapFilterMode; ///< Mipmap filter mode + float mipmapLevelBias; ///< Mipmap level bias + float minMipmapLevelClamp; ///< Mipmap minimum level clamp + float maxMipmapLevelClamp; ///< Mipmap maximum level clamp + float borderColor[4]; ///< Border Color + int reserved[12]; +} HIP_TEXTURE_DESC; +/** + * HIP texture resource view formats + */ +typedef enum hipResourceViewFormat { + hipResViewFormatNone = 0x00, ///< No resource view format (use underlying resource format) + hipResViewFormatUnsignedChar1 = 0x01, ///< 1 channel, unsigned 8-bit integers + hipResViewFormatUnsignedChar2 = 0x02, ///< 2 channels, unsigned 8-bit integers + hipResViewFormatUnsignedChar4 = 0x03, ///< 4 channels, unsigned 8-bit integers + hipResViewFormatSignedChar1 = 0x04, ///< 1 channel, signed 8-bit integers + hipResViewFormatSignedChar2 = 0x05, ///< 2 channels, signed 8-bit integers + hipResViewFormatSignedChar4 = 0x06, ///< 4 channels, signed 8-bit integers + hipResViewFormatUnsignedShort1 = 0x07, ///< 1 channel, unsigned 16-bit integers + hipResViewFormatUnsignedShort2 = 0x08, ///< 2 channels, unsigned 16-bit integers + hipResViewFormatUnsignedShort4 = 0x09, ///< 4 channels, unsigned 16-bit integers + hipResViewFormatSignedShort1 = 0x0a, ///< 1 channel, signed 16-bit integers + hipResViewFormatSignedShort2 = 0x0b, ///< 2 channels, signed 16-bit integers + hipResViewFormatSignedShort4 = 0x0c, ///< 4 channels, signed 16-bit integers + hipResViewFormatUnsignedInt1 = 0x0d, ///< 1 channel, unsigned 32-bit integers + hipResViewFormatUnsignedInt2 = 0x0e, ///< 2 channels, unsigned 32-bit integers + hipResViewFormatUnsignedInt4 = 0x0f, ///< 4 channels, unsigned 32-bit integers + hipResViewFormatSignedInt1 = 0x10, ///< 1 channel, signed 32-bit integers + hipResViewFormatSignedInt2 = 0x11, ///< 2 channels, signed 32-bit integers + hipResViewFormatSignedInt4 = 0x12, ///< 4 channels, signed 32-bit integers + hipResViewFormatHalf1 = 0x13, ///< 1 channel, 16-bit floating point + hipResViewFormatHalf2 = 0x14, ///< 2 channels, 16-bit floating point + hipResViewFormatHalf4 = 0x15, ///< 4 channels, 16-bit floating point + hipResViewFormatFloat1 = 0x16, ///< 1 channel, 32-bit floating point + hipResViewFormatFloat2 = 0x17, ///< 2 channels, 32-bit floating point + hipResViewFormatFloat4 = 0x18, ///< 4 channels, 32-bit floating point + hipResViewFormatUnsignedBlockCompressed1 = 0x19, ///< Block-compressed 1 + hipResViewFormatUnsignedBlockCompressed2 = 0x1a, ///< Block-compressed 2 + hipResViewFormatUnsignedBlockCompressed3 = 0x1b, ///< Block-compressed 3 + hipResViewFormatUnsignedBlockCompressed4 = 0x1c, ///< Block-compressed 4 unsigned + hipResViewFormatSignedBlockCompressed4 = 0x1d, ///< Block-compressed 4 signed + hipResViewFormatUnsignedBlockCompressed5 = 0x1e, ///< Block-compressed 5 unsigned + hipResViewFormatSignedBlockCompressed5 = 0x1f, ///< Block-compressed 5 signed + hipResViewFormatUnsignedBlockCompressed6H = 0x20, ///< Block-compressed 6 unsigned half-float + hipResViewFormatSignedBlockCompressed6H = 0x21, ///< Block-compressed 6 signed half-float + hipResViewFormatUnsignedBlockCompressed7 = 0x22 ///< Block-compressed 7 +} hipResourceViewFormat; +/** + * HIP texture resource view formats + */ +typedef enum HIPresourceViewFormat_enum { + HIP_RES_VIEW_FORMAT_NONE = 0x00, ///< No resource view format (use underlying resource format) + HIP_RES_VIEW_FORMAT_UINT_1X8 = 0x01, ///< 1 channel, unsigned 8-bit integers + HIP_RES_VIEW_FORMAT_UINT_2X8 = 0x02, ///< 2 channels, unsigned 8-bit integers + HIP_RES_VIEW_FORMAT_UINT_4X8 = 0x03, ///< 4 channels, unsigned 8-bit integers + HIP_RES_VIEW_FORMAT_SINT_1X8 = 0x04, ///< 1 channel, signed 8-bit integers + HIP_RES_VIEW_FORMAT_SINT_2X8 = 0x05, ///< 2 channels, signed 8-bit integers + HIP_RES_VIEW_FORMAT_SINT_4X8 = 0x06, ///< 4 channels, signed 8-bit integers + HIP_RES_VIEW_FORMAT_UINT_1X16 = 0x07, ///< 1 channel, unsigned 16-bit integers + HIP_RES_VIEW_FORMAT_UINT_2X16 = 0x08, ///< 2 channels, unsigned 16-bit integers + HIP_RES_VIEW_FORMAT_UINT_4X16 = 0x09, ///< 4 channels, unsigned 16-bit integers + HIP_RES_VIEW_FORMAT_SINT_1X16 = 0x0a, ///< 1 channel, signed 16-bit integers + HIP_RES_VIEW_FORMAT_SINT_2X16 = 0x0b, ///< 2 channels, signed 16-bit integers + HIP_RES_VIEW_FORMAT_SINT_4X16 = 0x0c, ///< 4 channels, signed 16-bit integers + HIP_RES_VIEW_FORMAT_UINT_1X32 = 0x0d, ///< 1 channel, unsigned 32-bit integers + HIP_RES_VIEW_FORMAT_UINT_2X32 = 0x0e, ///< 2 channels, unsigned 32-bit integers + HIP_RES_VIEW_FORMAT_UINT_4X32 = 0x0f, ///< 4 channels, unsigned 32-bit integers + HIP_RES_VIEW_FORMAT_SINT_1X32 = 0x10, ///< 1 channel, signed 32-bit integers + HIP_RES_VIEW_FORMAT_SINT_2X32 = 0x11, ///< 2 channels, signed 32-bit integers + HIP_RES_VIEW_FORMAT_SINT_4X32 = 0x12, ///< 4 channels, signed 32-bit integers + HIP_RES_VIEW_FORMAT_FLOAT_1X16 = 0x13, ///< 1 channel, 16-bit floating point + HIP_RES_VIEW_FORMAT_FLOAT_2X16 = 0x14, ///< 2 channels, 16-bit floating point + HIP_RES_VIEW_FORMAT_FLOAT_4X16 = 0x15, ///< 4 channels, 16-bit floating point + HIP_RES_VIEW_FORMAT_FLOAT_1X32 = 0x16, ///< 1 channel, 32-bit floating point + HIP_RES_VIEW_FORMAT_FLOAT_2X32 = 0x17, ///< 2 channels, 32-bit floating point + HIP_RES_VIEW_FORMAT_FLOAT_4X32 = 0x18, ///< 4 channels, 32-bit floating point + HIP_RES_VIEW_FORMAT_UNSIGNED_BC1 = 0x19, ///< Block-compressed 1 + HIP_RES_VIEW_FORMAT_UNSIGNED_BC2 = 0x1a, ///< Block-compressed 2 + HIP_RES_VIEW_FORMAT_UNSIGNED_BC3 = 0x1b, ///< Block-compressed 3 + HIP_RES_VIEW_FORMAT_UNSIGNED_BC4 = 0x1c, ///< Block-compressed 4 unsigned + HIP_RES_VIEW_FORMAT_SIGNED_BC4 = 0x1d, ///< Block-compressed 4 signed + HIP_RES_VIEW_FORMAT_UNSIGNED_BC5 = 0x1e, ///< Block-compressed 5 unsigned + HIP_RES_VIEW_FORMAT_SIGNED_BC5 = 0x1f, ///< Block-compressed 5 signed + HIP_RES_VIEW_FORMAT_UNSIGNED_BC6H = 0x20, ///< Block-compressed 6 unsigned half-float + HIP_RES_VIEW_FORMAT_SIGNED_BC6H = 0x21, ///< Block-compressed 6 signed half-float + HIP_RES_VIEW_FORMAT_UNSIGNED_BC7 = 0x22 ///< Block-compressed 7 +} HIPresourceViewFormat; +/** + * HIP resource descriptor + */ +typedef struct hipResourceDesc { + enum hipResourceType resType; ///< Resource type + union { + struct { + hipArray_t array; ///< HIP array + } array; + struct { + hipMipmappedArray_t mipmap; ///< HIP mipmapped array + } mipmap; + struct { + void* devPtr; ///< Device pointer + struct hipChannelFormatDesc desc; ///< Channel format description + size_t sizeInBytes; ///< Size in bytes + } linear; + struct { + void* devPtr; ///< Device pointer + struct hipChannelFormatDesc desc; ///< Channel format description + size_t width; ///< Width of the array in elements + size_t height; ///< Height of the array in elements + size_t pitchInBytes; ///< Pitch between two rows in bytes + } pitch2D; + } res; +} hipResourceDesc; + +/** + * HIP resource view descriptor struct + */ +typedef struct HIP_RESOURCE_DESC_st { + HIPresourcetype resType; ///< Resource type + union { + struct { + hipArray_t hArray; ///< HIP array + } array; + struct { + hipMipmappedArray_t hMipmappedArray; ///< HIP mipmapped array + } mipmap; + struct { + hipDeviceptr_t devPtr; ///< Device pointer + hipArray_Format format; ///< Array format + unsigned int numChannels; ///< Channels per array element + size_t sizeInBytes; ///< Size in bytes + } linear; + struct { + hipDeviceptr_t devPtr; ///< Device pointer + hipArray_Format format; ///< Array format + unsigned int numChannels; ///< Channels per array element + size_t width; ///< Width of the array in elements + size_t height; ///< Height of the array in elements + size_t pitchInBytes; ///< Pitch between two rows in bytes + } pitch2D; + struct { + int reserved[32]; + } reserved; + } res; + unsigned int flags; ///< Flags (must be zero) +} HIP_RESOURCE_DESC; +/** + * HIP resource view descriptor + */ +struct hipResourceViewDesc { + enum hipResourceViewFormat format; ///< Resource view format + size_t width; ///< Width of the resource view + size_t height; ///< Height of the resource view + size_t depth; ///< Depth of the resource view + unsigned int firstMipmapLevel; ///< First defined mipmap level + unsigned int lastMipmapLevel; ///< Last defined mipmap level + unsigned int firstLayer; ///< First layer index + unsigned int lastLayer; ///< Last layer index +}; +/** + * Resource view descriptor + */ +typedef struct HIP_RESOURCE_VIEW_DESC_st { + HIPresourceViewFormat format; ///< Resource view format + size_t width; ///< Width of the resource view + size_t height; ///< Height of the resource view + size_t depth; ///< Depth of the resource view + unsigned int firstMipmapLevel; ///< First defined mipmap level + unsigned int lastMipmapLevel; ///< Last defined mipmap level + unsigned int firstLayer; ///< First layer index + unsigned int lastLayer; ///< Last layer index + unsigned int reserved[16]; +} HIP_RESOURCE_VIEW_DESC; +/** + * Memory copy types + */ +#if !defined(__HIPCC_RTC__) +typedef enum hipMemcpyKind { + hipMemcpyHostToHost = 0, ///< Host-to-Host Copy + hipMemcpyHostToDevice = 1, ///< Host-to-Device Copy + hipMemcpyDeviceToHost = 2, ///< Device-to-Host Copy + hipMemcpyDeviceToDevice = 3, ///< Device-to-Device Copy + hipMemcpyDefault = 4, ///< Runtime will automatically determine + ///< copy-kind based on virtual addresses. + hipMemcpyDeviceToDeviceNoCU = 1024 ///< Device-to-Device Copy without using compute units +} hipMemcpyKind; +/** + * HIP pithed pointer + */ +typedef struct hipPitchedPtr { + void* ptr; ///< Pointer to the allocated memory + size_t pitch; ///< Pitch in bytes + size_t xsize; ///< Logical size of the first dimension of allocation in elements + size_t ysize; ///< Logical size of the second dimension of allocation in elements +} hipPitchedPtr; +/** + * HIP extent + */ +typedef struct hipExtent { + size_t width; // Width in elements when referring to array memory, in bytes when referring to + // linear memory + size_t height; + size_t depth; +} hipExtent; +/** + * HIP position + */ +typedef struct hipPos { + size_t x; ///< X coordinate + size_t y; ///< Y coordinate + size_t z; ///< Z coordinate +} hipPos; +/** + * HIP 3D memory copy parameters + */ +typedef struct hipMemcpy3DParms { + hipArray_t srcArray; ///< Source array + struct hipPos srcPos; ///< Source position + struct hipPitchedPtr srcPtr; ///< Source pointer + hipArray_t dstArray; ///< Destination array + struct hipPos dstPos; ///< Destination position + struct hipPitchedPtr dstPtr; ///< Destination pointer + struct hipExtent extent; ///< Extent of 3D memory copy + enum hipMemcpyKind kind; ///< Kind of 3D memory copy +} hipMemcpy3DParms; +/** + * HIP 3D memory copy + */ +typedef struct HIP_MEMCPY3D { + size_t srcXInBytes; ///< Source X in bytes + size_t srcY; ///< Source Y + size_t srcZ; ///< Source Z + size_t srcLOD; ///< Source LOD + hipMemoryType srcMemoryType; ///< Source memory type + const void* srcHost; ///< Source host pointer + hipDeviceptr_t srcDevice; ///< Source device + hipArray_t srcArray; ///< Source array + size_t srcPitch; ///< Source pitch + size_t srcHeight; ///< Source height + size_t dstXInBytes; ///< Destination X in bytes + size_t dstY; ///< Destination Y + size_t dstZ; ///< Destination Z + size_t dstLOD; ///< Destination LOD + hipMemoryType dstMemoryType; ///< Destination memory type + void* dstHost; ///< Destination host pointer + hipDeviceptr_t dstDevice; ///< Destination device + hipArray_t dstArray; ///< Destination array + size_t dstPitch; ///< Destination pitch + size_t dstHeight; ///< Destination height + size_t WidthInBytes; ///< Width in bytes of 3D memory copy + size_t Height; ///< Height in bytes of 3D memory copy + size_t Depth; ///< Depth in bytes of 3D memory copy +} HIP_MEMCPY3D; +/** + * Specifies the type of location + */ +typedef enum hipMemLocationType { + hipMemLocationTypeInvalid = 0, + hipMemLocationTypeNone = 0, + hipMemLocationTypeDevice = 1, ///< Device location, thus it's HIP device ID + hipMemLocationTypeHost = 2, ///< Host location, id is ignored + hipMemLocationTypeHostNuma = 3, ///< Host NUMA node location, id is host NUMA node id + hipMemLocationTypeHostNumaCurrent = + 4 ///< Host NUMA node closest to current thread’s CPU, id is ignored +} hipMemLocationType; +/** + * Specifies a memory location. + * + * To specify a gpu, set type = @p hipMemLocationTypeDevice and set id = the gpu's device ID + */ +typedef struct hipMemLocation { + hipMemLocationType type; ///< Specifies the location type, which describes the meaning of id + int id; ///< Identifier for the provided location type @p hipMemLocationType +} hipMemLocation; + +/** + * Flags to specify for copies within a batch. Used with hipMemcpyBatchAsync + */ +typedef enum hipMemcpyFlags { + hipMemcpyFlagDefault = 0x0, ///< Default flag + hipMemcpyFlagPreferOverlapWithCompute = 0x1 ///< Tries to overlap copy with compute work. +} hipMemcpyFlags; + +/** + * Flags to specify order in which source pointer is accessed by Batch memcpy + */ +typedef enum hipMemcpySrcAccessOrder { + hipMemcpySrcAccessOrderInvalid = 0x0, ///< Default Invalid. + hipMemcpySrcAccessOrderStream = 0x1, ///< Access to source pointer must be in stream order. + hipMemcpySrcAccessOrderDuringApiCall = + 0x2, ///< Access to source pointer can be out of stream order and all accesses must be + ///< complete before API call returns. + hipMemcpySrcAccessOrderAny = + 0x3, ///< Access to the source pointer can be out of stream order and the accesses can happen + ///< even after the API call return. + hipMemcpySrcAccessOrderMax = 0x7FFFFFFF +} hipMemcpySrcAccessOrder; + +/** + * Attributes for copies within a batch. + */ +typedef struct hipMemcpyAttributes { + hipMemcpySrcAccessOrder + srcAccessOrder; ///< Source access ordering to be observed for copies with this attribute. + hipMemLocation srcLocHint; ///< Location hint for src operand. + hipMemLocation dstLocHint; ///< Location hint for destination operand. + unsigned int flags; ///< Additional Flags for copies. See hipMemcpyFlags. +} hipMemcpyAttributes; +/** + * Operand types for individual copies within a batch + */ +typedef enum hipMemcpy3DOperandType { + hipMemcpyOperandTypePointer = 0x1, ///< Mempcy operand is a valid pointer. + hipMemcpyOperandTypeArray = 0x2, ///< Memcpy operand is a valid hipArray. + hipMemcpyOperandTypeMax = 0x7FFFFFFF +} hipMemcpy3DOperandType; + +/** + * Struct representing offset into a hipArray_t in elements. + */ +typedef struct hipOffset3D { + size_t x; + size_t y; + size_t z; +} hipOffset3D; +/** + * Struct representing an operand for copy with hipMemcpy3DBatchAsync. + */ +typedef struct hipMemcpy3DOperand { + hipMemcpy3DOperandType type; + union { + struct { + void* ptr; + size_t rowLength; ///< Length of each row in elements. + size_t layerHeight; ///< Height of each layer in elements. + hipMemLocation locHint; ///< Location Hint for the operand. + } ptr; + struct { + hipArray_t array; ///< Array struct for hipMemcpyOperandTypeArray. + hipOffset3D offset; ///< Offset into array in elements. + } array; + } op; +} hipMemcpy3DOperand; + +/** + * HIP 3D Batch Op + */ +typedef struct hipMemcpy3DBatchOp { + hipMemcpy3DOperand src; + hipMemcpy3DOperand dst; + hipExtent extent; + hipMemcpySrcAccessOrder srcAccessOrder; + unsigned int flags; +} hipMemcpy3DBatchOp; + +typedef struct hipMemcpy3DPeerParms { + hipArray_t srcArray; ///< Source memory address + hipPos srcPos; ///< Source position offset + hipPitchedPtr srcPtr; ///< Pitched source memory address + int srcDevice; ///< Source device + hipArray_t dstArray; ///< Destination memory address + hipPos dstPos; ///< Destination position offset + hipPitchedPtr dstPtr; ///< Pitched destination memory address + int dstDevice; ///< Destination device + hipExtent extent; ///< Requested memory copy size +} hipMemcpy3DPeerParms; + +/** + * @brief Make hipPitchedPtr + * + * @param [in] d Pointer to the allocated memory + * @param [in] p Pitch in bytes + * @param [in] xsz Logical size of the first dimension of allocation in elements + * @param [in] ysz Logical size of the second dimension of allocation in elements + * + * @returns The created hipPitchedPtr + */ +static inline struct hipPitchedPtr make_hipPitchedPtr(void* d, size_t p, size_t xsz, size_t ysz) { + struct hipPitchedPtr s; + s.ptr = d; + s.pitch = p; + s.xsize = xsz; + s.ysize = ysz; + return s; +} +/** + * @brief Make hipPos struct + * + * @param [in] x X coordinate of the new hipPos + * @param [in] y Y coordinate of the new hipPos + * @param [in] z Z coordinate of the new hipPos + * + * @returns The created hipPos struct + */ +static inline struct hipPos make_hipPos(size_t x, size_t y, size_t z) { + struct hipPos p; + p.x = x; + p.y = y; + p.z = z; + return p; +} +/** + * @brief Make hipExtent struct + * + * @param [in] w Width of the new hipExtent + * @param [in] h Height of the new hipExtent + * @param [in] d Depth of the new hipExtent + * + * @returns The created hipExtent struct + */ +static inline struct hipExtent make_hipExtent(size_t w, size_t h, size_t d) { + struct hipExtent e; + e.width = w; + e.height = h; + e.depth = d; + return e; +} +typedef enum hipFunction_attribute { + HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, ///< The maximum number of threads per block. Depends + ///< on function and device. + HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, ///< The statically allocated shared memory size in bytes + ///< per block required by the function. + HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES, ///< The user-allocated constant memory by the function in + ///< bytes. + HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES, ///< The local memory usage of each thread by this function + ///< in bytes. + HIP_FUNC_ATTRIBUTE_NUM_REGS, ///< The number of registers used by each thread of this function. + HIP_FUNC_ATTRIBUTE_PTX_VERSION, ///< PTX version + HIP_FUNC_ATTRIBUTE_BINARY_VERSION, ///< Binary version + HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA, ///< Cache mode + HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, ///< The maximum dynamic shared memory per + ///< block for this function in bytes. + HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT, ///< The shared memory carveout preference + ///< in percent of the maximum shared + ///< memory. + HIP_FUNC_ATTRIBUTE_MAX +} hipFunction_attribute; + +typedef enum hipPointer_attribute { + HIP_POINTER_ATTRIBUTE_CONTEXT = 1, ///< The context on which a pointer was allocated + ///< @warning This attribute is not supported in HIP + HIP_POINTER_ATTRIBUTE_MEMORY_TYPE, ///< memory type describing the location of a pointer + HIP_POINTER_ATTRIBUTE_DEVICE_POINTER, ///< address at which the pointer is allocated on the + ///< device + HIP_POINTER_ATTRIBUTE_HOST_POINTER, ///< address at which the pointer is allocated on the host + HIP_POINTER_ATTRIBUTE_P2P_TOKENS, ///< A pair of tokens for use with Linux kernel interface + ///< @warning This attribute is not supported in HIP + HIP_POINTER_ATTRIBUTE_SYNC_MEMOPS, ///< Synchronize every synchronous memory operation + ///< initiated on this region + HIP_POINTER_ATTRIBUTE_BUFFER_ID, ///< Unique ID for an allocated memory region + HIP_POINTER_ATTRIBUTE_IS_MANAGED, ///< Indicates if the pointer points to managed memory + HIP_POINTER_ATTRIBUTE_DEVICE_ORDINAL, ///< device ordinal of a device on which a pointer + ///< was allocated or registered + HIP_POINTER_ATTRIBUTE_IS_LEGACY_HIP_IPC_CAPABLE, ///< if this pointer maps to an allocation + ///< that is suitable for hipIpcGetMemHandle + ///< @warning This attribute is not supported in + ///< HIP + HIP_POINTER_ATTRIBUTE_RANGE_START_ADDR, ///< Starting address for this requested pointer + HIP_POINTER_ATTRIBUTE_RANGE_SIZE, ///< Size of the address range for this requested pointer + HIP_POINTER_ATTRIBUTE_MAPPED, ///< tells if this pointer is in a valid address range + ///< that is mapped to a backing allocation + HIP_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES, ///< Bitmask of allowed hipmemAllocationHandleType + ///< for this allocation @warning This attribute is + ///< not supported in HIP + HIP_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE, ///< returns if the memory referenced by + ///< this pointer can be used with the + ///< GPUDirect RDMA API + ///< @warning This attribute is not supported + ///< in HIP + HIP_POINTER_ATTRIBUTE_ACCESS_FLAGS, ///< Returns the access flags the device associated with + ///< for the corresponding memory referenced by the ptr + HIP_POINTER_ATTRIBUTE_MEMPOOL_HANDLE ///< Returns the mempool handle for the allocation if + ///< it was allocated from a mempool + ///< @warning This attribute is not supported in HIP +} hipPointer_attribute; + +// doxygen end DriverTypes +/** + * @} + */ + +#endif // !defined(__HIPCC_RTC__) +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif +#endif diff --git a/external/hip/hip/channel_descriptor.h b/external/hip/hip/channel_descriptor.h new file mode 100644 index 0000000..21d5f20 --- /dev/null +++ b/external/hip/hip/channel_descriptor.h @@ -0,0 +1,39 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_CHANNEL_DESCRIPTOR_H +#define HIP_INCLUDE_HIP_CHANNEL_DESCRIPTOR_H + +// Some standard header files, these are included by hc.hpp and so want to make them avail on both +// paths to provide a consistent include env and avoid "missing symbol" errors that only appears +// on NVCC path: + + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) +#include +#elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#include +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif + +#endif diff --git a/external/hip/hip/device_functions.h b/external/hip/hip/device_functions.h new file mode 100644 index 0000000..8c1ba57 --- /dev/null +++ b/external/hip/hip/device_functions.h @@ -0,0 +1,38 @@ +/* +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_DEVICE_FUNCTIONS_H +#define HIP_INCLUDE_HIP_DEVICE_FUNCTIONS_H + +#if !defined(__HIPCC_RTC__) +#include +#endif + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) +#include +#elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#include +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif + +#endif diff --git a/external/hip/hip/driver_types.h b/external/hip/hip/driver_types.h new file mode 100644 index 0000000..1b64d16 --- /dev/null +++ b/external/hip/hip/driver_types.h @@ -0,0 +1,681 @@ +/* +Copyright (c) 2015 - 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_DRIVER_TYPES_H +#define HIP_INCLUDE_HIP_DRIVER_TYPES_H + +#if !defined(__HIPCC_RTC__) +#include +#if __cplusplus +#include +#else +#include // size_t +#endif +#endif + +#if !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#include "driver_types.h" +#elif defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) + +/** + * @defgroup DriverTypes Driver Types + * @{ + * This section describes the driver data types. + * + */ + +typedef void* hipDeviceptr_t; +/** + * HIP channel format kinds + */ +typedef enum hipChannelFormatKind { + hipChannelFormatKindSigned = 0, ///< Signed channel format + hipChannelFormatKindUnsigned = 1, ///< Unsigned channel format + hipChannelFormatKindFloat = 2, ///< Float channel format + hipChannelFormatKindNone = 3 ///< No channel format +} hipChannelFormatKind; +/** + * HIP channel format descriptor + */ +typedef struct hipChannelFormatDesc { + int x; + int y; + int z; + int w; + enum hipChannelFormatKind f; ///< Channel format kind +} hipChannelFormatDesc; +/** @brief The hipTexRefSetArray function flags parameter override format value*/ +#define HIP_TRSA_OVERRIDE_FORMAT 0x01 +/** @brief The hipTexRefSetFlags function flags parameter read as integer value*/ +#define HIP_TRSF_READ_AS_INTEGER 0x01 +/** @brief The hipTexRefSetFlags function flags parameter normalized coordinate value*/ +#define HIP_TRSF_NORMALIZED_COORDINATES 0x02 +/** @brief The hipTexRefSetFlags function flags parameter srgb value*/ +#define HIP_TRSF_SRGB 0x10 + +typedef struct hipArray* hipArray_t; +typedef const struct hipArray* hipArray_const_t; +/** + * HIP array format + */ +typedef enum hipArray_Format { + HIP_AD_FORMAT_UNSIGNED_INT8 = 0x01, ///< Unsigned 8-bit array format + HIP_AD_FORMAT_UNSIGNED_INT16 = 0x02, ///< Unsigned 16-bit array format + HIP_AD_FORMAT_UNSIGNED_INT32 = 0x03, ///< Unsigned 32-bit array format + HIP_AD_FORMAT_SIGNED_INT8 = 0x08, ///< Signed 8-bit array format + HIP_AD_FORMAT_SIGNED_INT16 = 0x09, ///< Signed 16-bit array format + HIP_AD_FORMAT_SIGNED_INT32 = 0x0a, ///< Signed 32-bit array format + HIP_AD_FORMAT_HALF = 0x10, ///< Half array format + HIP_AD_FORMAT_FLOAT = 0x20 ///< Float array format +} hipArray_Format; +/** + * HIP array descriptor + */ +typedef struct HIP_ARRAY_DESCRIPTOR { + size_t Width; ///< Width of the array + size_t Height; ///< Height of the array + enum hipArray_Format Format; ///< Format of the array + unsigned int NumChannels; ///< Number of channels of the array +} HIP_ARRAY_DESCRIPTOR; + +/** + * HIP 3D array descriptor + */ +typedef struct HIP_ARRAY3D_DESCRIPTOR { + size_t Width; ///< Width of the array + size_t Height; ///< Height of the array + size_t Depth; ///< Depth of the array + enum hipArray_Format Format; ///< Format of the array + unsigned int NumChannels; ///< Number of channels of the array + unsigned int Flags; ///< Flags of the array +} HIP_ARRAY3D_DESCRIPTOR; +#if !defined(__HIPCC_RTC__) +/** + * HIP 2D memory copy parameters + */ +typedef struct hip_Memcpy2D { + size_t srcXInBytes; ///< Source width in bytes + size_t srcY; ///< Source height + hipMemoryType srcMemoryType; ///< Source memory type + const void* srcHost; ///< Source pointer + hipDeviceptr_t srcDevice; ///< Source device + hipArray_t srcArray; ///< Source array + size_t srcPitch; ///< Source pitch + size_t dstXInBytes; ///< Destination width in bytes + size_t dstY; ///< Destination height + hipMemoryType dstMemoryType; ///< Destination memory type + void* dstHost; ///< Destination pointer + hipDeviceptr_t dstDevice; ///< Destination device + hipArray_t dstArray; ///< Destination array + size_t dstPitch; ///< Destination pitch + size_t WidthInBytes; ///< Width in bytes of the 2D memory copy + size_t Height; ///< Height of the 2D memory copy +} hip_Memcpy2D; +#endif // !defined(__HIPCC_RTC__) +/** + * HIP mipmapped array + */ +typedef struct hipMipmappedArray { + void* data; ///< Data pointer of the mipmapped array + struct hipChannelFormatDesc desc; ///< Description of the mipmapped array + unsigned int type; ///< Type of the mipmapped array + unsigned int width; ///< Width of the mipmapped array + unsigned int height; ///< Height of the mipmapped array + unsigned int depth; ///< Depth of the mipmapped array + unsigned int min_mipmap_level; ///< Minimum level of the mipmapped array + unsigned int max_mipmap_level; ///< Maximum level of the mipmapped array + unsigned int flags; ///< Flags of the mipmapped array + enum hipArray_Format format; ///< Format of the mipmapped array + unsigned int num_channels; ///< Number of channels of the mipmapped array +} hipMipmappedArray; +/** + * HIP mipmapped array pointer + */ +typedef struct hipMipmappedArray* hipMipmappedArray_t; +typedef hipMipmappedArray_t hipmipmappedArray; +typedef const struct hipMipmappedArray* hipMipmappedArray_const_t; +/** + * HIP resource types + */ +typedef enum hipResourceType { + hipResourceTypeArray = 0x00, ///< Array resource + hipResourceTypeMipmappedArray = 0x01, ///< Mipmapped array resource + hipResourceTypeLinear = 0x02, ///< Linear resource + hipResourceTypePitch2D = 0x03 ///< Pitch 2D resource +} hipResourceType; +typedef enum HIPresourcetype_enum { + HIP_RESOURCE_TYPE_ARRAY = 0x00, ///< Array resource + HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY = 0x01, ///< Mipmapped array resource + HIP_RESOURCE_TYPE_LINEAR = 0x02, ///< Linear resource + HIP_RESOURCE_TYPE_PITCH2D = 0x03 ///< Pitch 2D resource +} HIPresourcetype, + hipResourcetype; +/** + * HIP texture address modes + */ +typedef enum HIPaddress_mode_enum { + HIP_TR_ADDRESS_MODE_WRAP = 0, ///< Wrap address mode + HIP_TR_ADDRESS_MODE_CLAMP = 1, ///< Clamp address mode + HIP_TR_ADDRESS_MODE_MIRROR = 2, ///< Mirror address mode + HIP_TR_ADDRESS_MODE_BORDER = 3 ///< Border address mode +} HIPaddress_mode; +/** + * HIP filter modes + */ +typedef enum HIPfilter_mode_enum { + HIP_TR_FILTER_MODE_POINT = 0, ///< Filter mode point + HIP_TR_FILTER_MODE_LINEAR = 1 ///< Filter mode linear +} HIPfilter_mode; +/** + * HIP texture descriptor + */ +typedef struct HIP_TEXTURE_DESC_st { + HIPaddress_mode addressMode[3]; ///< Address modes + HIPfilter_mode filterMode; ///< Filter mode + unsigned int flags; ///< Flags + unsigned int maxAnisotropy; ///< Maximum anisotropy ratio + HIPfilter_mode mipmapFilterMode; ///< Mipmap filter mode + float mipmapLevelBias; ///< Mipmap level bias + float minMipmapLevelClamp; ///< Mipmap minimum level clamp + float maxMipmapLevelClamp; ///< Mipmap maximum level clamp + float borderColor[4]; ///< Border Color + int reserved[12]; +} HIP_TEXTURE_DESC; +/** + * HIP texture resource view formats + */ +typedef enum hipResourceViewFormat { + hipResViewFormatNone = 0x00, ///< No resource view format (use underlying resource format) + hipResViewFormatUnsignedChar1 = 0x01, ///< 1 channel, unsigned 8-bit integers + hipResViewFormatUnsignedChar2 = 0x02, ///< 2 channels, unsigned 8-bit integers + hipResViewFormatUnsignedChar4 = 0x03, ///< 4 channels, unsigned 8-bit integers + hipResViewFormatSignedChar1 = 0x04, ///< 1 channel, signed 8-bit integers + hipResViewFormatSignedChar2 = 0x05, ///< 2 channels, signed 8-bit integers + hipResViewFormatSignedChar4 = 0x06, ///< 4 channels, signed 8-bit integers + hipResViewFormatUnsignedShort1 = 0x07, ///< 1 channel, unsigned 16-bit integers + hipResViewFormatUnsignedShort2 = 0x08, ///< 2 channels, unsigned 16-bit integers + hipResViewFormatUnsignedShort4 = 0x09, ///< 4 channels, unsigned 16-bit integers + hipResViewFormatSignedShort1 = 0x0a, ///< 1 channel, signed 16-bit integers + hipResViewFormatSignedShort2 = 0x0b, ///< 2 channels, signed 16-bit integers + hipResViewFormatSignedShort4 = 0x0c, ///< 4 channels, signed 16-bit integers + hipResViewFormatUnsignedInt1 = 0x0d, ///< 1 channel, unsigned 32-bit integers + hipResViewFormatUnsignedInt2 = 0x0e, ///< 2 channels, unsigned 32-bit integers + hipResViewFormatUnsignedInt4 = 0x0f, ///< 4 channels, unsigned 32-bit integers + hipResViewFormatSignedInt1 = 0x10, ///< 1 channel, signed 32-bit integers + hipResViewFormatSignedInt2 = 0x11, ///< 2 channels, signed 32-bit integers + hipResViewFormatSignedInt4 = 0x12, ///< 4 channels, signed 32-bit integers + hipResViewFormatHalf1 = 0x13, ///< 1 channel, 16-bit floating point + hipResViewFormatHalf2 = 0x14, ///< 2 channels, 16-bit floating point + hipResViewFormatHalf4 = 0x15, ///< 4 channels, 16-bit floating point + hipResViewFormatFloat1 = 0x16, ///< 1 channel, 32-bit floating point + hipResViewFormatFloat2 = 0x17, ///< 2 channels, 32-bit floating point + hipResViewFormatFloat4 = 0x18, ///< 4 channels, 32-bit floating point + hipResViewFormatUnsignedBlockCompressed1 = 0x19, ///< Block-compressed 1 + hipResViewFormatUnsignedBlockCompressed2 = 0x1a, ///< Block-compressed 2 + hipResViewFormatUnsignedBlockCompressed3 = 0x1b, ///< Block-compressed 3 + hipResViewFormatUnsignedBlockCompressed4 = 0x1c, ///< Block-compressed 4 unsigned + hipResViewFormatSignedBlockCompressed4 = 0x1d, ///< Block-compressed 4 signed + hipResViewFormatUnsignedBlockCompressed5 = 0x1e, ///< Block-compressed 5 unsigned + hipResViewFormatSignedBlockCompressed5 = 0x1f, ///< Block-compressed 5 signed + hipResViewFormatUnsignedBlockCompressed6H = 0x20, ///< Block-compressed 6 unsigned half-float + hipResViewFormatSignedBlockCompressed6H = 0x21, ///< Block-compressed 6 signed half-float + hipResViewFormatUnsignedBlockCompressed7 = 0x22 ///< Block-compressed 7 +} hipResourceViewFormat; +/** + * HIP texture resource view formats + */ +typedef enum HIPresourceViewFormat_enum { + HIP_RES_VIEW_FORMAT_NONE = 0x00, ///< No resource view format (use underlying resource format) + HIP_RES_VIEW_FORMAT_UINT_1X8 = 0x01, ///< 1 channel, unsigned 8-bit integers + HIP_RES_VIEW_FORMAT_UINT_2X8 = 0x02, ///< 2 channels, unsigned 8-bit integers + HIP_RES_VIEW_FORMAT_UINT_4X8 = 0x03, ///< 4 channels, unsigned 8-bit integers + HIP_RES_VIEW_FORMAT_SINT_1X8 = 0x04, ///< 1 channel, signed 8-bit integers + HIP_RES_VIEW_FORMAT_SINT_2X8 = 0x05, ///< 2 channels, signed 8-bit integers + HIP_RES_VIEW_FORMAT_SINT_4X8 = 0x06, ///< 4 channels, signed 8-bit integers + HIP_RES_VIEW_FORMAT_UINT_1X16 = 0x07, ///< 1 channel, unsigned 16-bit integers + HIP_RES_VIEW_FORMAT_UINT_2X16 = 0x08, ///< 2 channels, unsigned 16-bit integers + HIP_RES_VIEW_FORMAT_UINT_4X16 = 0x09, ///< 4 channels, unsigned 16-bit integers + HIP_RES_VIEW_FORMAT_SINT_1X16 = 0x0a, ///< 1 channel, signed 16-bit integers + HIP_RES_VIEW_FORMAT_SINT_2X16 = 0x0b, ///< 2 channels, signed 16-bit integers + HIP_RES_VIEW_FORMAT_SINT_4X16 = 0x0c, ///< 4 channels, signed 16-bit integers + HIP_RES_VIEW_FORMAT_UINT_1X32 = 0x0d, ///< 1 channel, unsigned 32-bit integers + HIP_RES_VIEW_FORMAT_UINT_2X32 = 0x0e, ///< 2 channels, unsigned 32-bit integers + HIP_RES_VIEW_FORMAT_UINT_4X32 = 0x0f, ///< 4 channels, unsigned 32-bit integers + HIP_RES_VIEW_FORMAT_SINT_1X32 = 0x10, ///< 1 channel, signed 32-bit integers + HIP_RES_VIEW_FORMAT_SINT_2X32 = 0x11, ///< 2 channels, signed 32-bit integers + HIP_RES_VIEW_FORMAT_SINT_4X32 = 0x12, ///< 4 channels, signed 32-bit integers + HIP_RES_VIEW_FORMAT_FLOAT_1X16 = 0x13, ///< 1 channel, 16-bit floating point + HIP_RES_VIEW_FORMAT_FLOAT_2X16 = 0x14, ///< 2 channels, 16-bit floating point + HIP_RES_VIEW_FORMAT_FLOAT_4X16 = 0x15, ///< 4 channels, 16-bit floating point + HIP_RES_VIEW_FORMAT_FLOAT_1X32 = 0x16, ///< 1 channel, 32-bit floating point + HIP_RES_VIEW_FORMAT_FLOAT_2X32 = 0x17, ///< 2 channels, 32-bit floating point + HIP_RES_VIEW_FORMAT_FLOAT_4X32 = 0x18, ///< 4 channels, 32-bit floating point + HIP_RES_VIEW_FORMAT_UNSIGNED_BC1 = 0x19, ///< Block-compressed 1 + HIP_RES_VIEW_FORMAT_UNSIGNED_BC2 = 0x1a, ///< Block-compressed 2 + HIP_RES_VIEW_FORMAT_UNSIGNED_BC3 = 0x1b, ///< Block-compressed 3 + HIP_RES_VIEW_FORMAT_UNSIGNED_BC4 = 0x1c, ///< Block-compressed 4 unsigned + HIP_RES_VIEW_FORMAT_SIGNED_BC4 = 0x1d, ///< Block-compressed 4 signed + HIP_RES_VIEW_FORMAT_UNSIGNED_BC5 = 0x1e, ///< Block-compressed 5 unsigned + HIP_RES_VIEW_FORMAT_SIGNED_BC5 = 0x1f, ///< Block-compressed 5 signed + HIP_RES_VIEW_FORMAT_UNSIGNED_BC6H = 0x20, ///< Block-compressed 6 unsigned half-float + HIP_RES_VIEW_FORMAT_SIGNED_BC6H = 0x21, ///< Block-compressed 6 signed half-float + HIP_RES_VIEW_FORMAT_UNSIGNED_BC7 = 0x22 ///< Block-compressed 7 +} HIPresourceViewFormat; +/** + * HIP resource descriptor + */ +typedef struct hipResourceDesc { + enum hipResourceType resType; ///< Resource type + union { + struct { + hipArray_t array; ///< HIP array + } array; + struct { + hipMipmappedArray_t mipmap; ///< HIP mipmapped array + } mipmap; + struct { + void* devPtr; ///< Device pointer + struct hipChannelFormatDesc desc; ///< Channel format description + size_t sizeInBytes; ///< Size in bytes + } linear; + struct { + void* devPtr; ///< Device pointer + struct hipChannelFormatDesc desc; ///< Channel format description + size_t width; ///< Width of the array in elements + size_t height; ///< Height of the array in elements + size_t pitchInBytes; ///< Pitch between two rows in bytes + } pitch2D; + } res; +} hipResourceDesc; + +/** + * HIP resource view descriptor struct + */ +typedef struct HIP_RESOURCE_DESC_st { + HIPresourcetype resType; ///< Resource type + union { + struct { + hipArray_t hArray; ///< HIP array + } array; + struct { + hipMipmappedArray_t hMipmappedArray; ///< HIP mipmapped array + } mipmap; + struct { + hipDeviceptr_t devPtr; ///< Device pointer + hipArray_Format format; ///< Array format + unsigned int numChannels; ///< Channels per array element + size_t sizeInBytes; ///< Size in bytes + } linear; + struct { + hipDeviceptr_t devPtr; ///< Device pointer + hipArray_Format format; ///< Array format + unsigned int numChannels; ///< Channels per array element + size_t width; ///< Width of the array in elements + size_t height; ///< Height of the array in elements + size_t pitchInBytes; ///< Pitch between two rows in bytes + } pitch2D; + struct { + int reserved[32]; + } reserved; + } res; + unsigned int flags; ///< Flags (must be zero) +} HIP_RESOURCE_DESC; +/** + * HIP resource view descriptor + */ +struct hipResourceViewDesc { + enum hipResourceViewFormat format; ///< Resource view format + size_t width; ///< Width of the resource view + size_t height; ///< Height of the resource view + size_t depth; ///< Depth of the resource view + unsigned int firstMipmapLevel; ///< First defined mipmap level + unsigned int lastMipmapLevel; ///< Last defined mipmap level + unsigned int firstLayer; ///< First layer index + unsigned int lastLayer; ///< Last layer index +}; +/** + * Resource view descriptor + */ +typedef struct HIP_RESOURCE_VIEW_DESC_st { + HIPresourceViewFormat format; ///< Resource view format + size_t width; ///< Width of the resource view + size_t height; ///< Height of the resource view + size_t depth; ///< Depth of the resource view + unsigned int firstMipmapLevel; ///< First defined mipmap level + unsigned int lastMipmapLevel; ///< Last defined mipmap level + unsigned int firstLayer; ///< First layer index + unsigned int lastLayer; ///< Last layer index + unsigned int reserved[16]; +} HIP_RESOURCE_VIEW_DESC; +/** + * Memory copy types + */ +#if !defined(__HIPCC_RTC__) +typedef enum hipMemcpyKind { + hipMemcpyHostToHost = 0, ///< Host-to-Host Copy + hipMemcpyHostToDevice = 1, ///< Host-to-Device Copy + hipMemcpyDeviceToHost = 2, ///< Device-to-Host Copy + hipMemcpyDeviceToDevice = 3, ///< Device-to-Device Copy + hipMemcpyDefault = 4, ///< Runtime will automatically determine + ///< copy-kind based on virtual addresses. + hipMemcpyDeviceToDeviceNoCU = 1024 ///< Device-to-Device Copy without using compute units +} hipMemcpyKind; +/** + * HIP pithed pointer + */ +typedef struct hipPitchedPtr { + void* ptr; ///< Pointer to the allocated memory + size_t pitch; ///< Pitch in bytes + size_t xsize; ///< Logical size of the first dimension of allocation in elements + size_t ysize; ///< Logical size of the second dimension of allocation in elements +} hipPitchedPtr; +/** + * HIP extent + */ +typedef struct hipExtent { + size_t width; // Width in elements when referring to array memory, in bytes when referring to + // linear memory + size_t height; + size_t depth; +} hipExtent; +/** + * HIP position + */ +typedef struct hipPos { + size_t x; ///< X coordinate + size_t y; ///< Y coordinate + size_t z; ///< Z coordinate +} hipPos; +/** + * HIP 3D memory copy parameters + */ +typedef struct hipMemcpy3DParms { + hipArray_t srcArray; ///< Source array + struct hipPos srcPos; ///< Source position + struct hipPitchedPtr srcPtr; ///< Source pointer + hipArray_t dstArray; ///< Destination array + struct hipPos dstPos; ///< Destination position + struct hipPitchedPtr dstPtr; ///< Destination pointer + struct hipExtent extent; ///< Extent of 3D memory copy + enum hipMemcpyKind kind; ///< Kind of 3D memory copy +} hipMemcpy3DParms; +/** + * HIP 3D memory copy + */ +typedef struct HIP_MEMCPY3D { + size_t srcXInBytes; ///< Source X in bytes + size_t srcY; ///< Source Y + size_t srcZ; ///< Source Z + size_t srcLOD; ///< Source LOD + hipMemoryType srcMemoryType; ///< Source memory type + const void* srcHost; ///< Source host pointer + hipDeviceptr_t srcDevice; ///< Source device + hipArray_t srcArray; ///< Source array + size_t srcPitch; ///< Source pitch + size_t srcHeight; ///< Source height + size_t dstXInBytes; ///< Destination X in bytes + size_t dstY; ///< Destination Y + size_t dstZ; ///< Destination Z + size_t dstLOD; ///< Destination LOD + hipMemoryType dstMemoryType; ///< Destination memory type + void* dstHost; ///< Destination host pointer + hipDeviceptr_t dstDevice; ///< Destination device + hipArray_t dstArray; ///< Destination array + size_t dstPitch; ///< Destination pitch + size_t dstHeight; ///< Destination height + size_t WidthInBytes; ///< Width in bytes of 3D memory copy + size_t Height; ///< Height in bytes of 3D memory copy + size_t Depth; ///< Depth in bytes of 3D memory copy +} HIP_MEMCPY3D; +/** + * Specifies the type of location + */ +typedef enum hipMemLocationType { + hipMemLocationTypeInvalid = 0, + hipMemLocationTypeNone = 0, + hipMemLocationTypeDevice = 1, ///< Device location, thus it's HIP device ID + hipMemLocationTypeHost = 2, ///< Host location, id is ignored + hipMemLocationTypeHostNuma = 3, ///< Host NUMA node location, id is host NUMA node id + hipMemLocationTypeHostNumaCurrent = + 4 ///< Host NUMA node closest to current thread’s CPU, id is ignored +} hipMemLocationType; +/** + * Specifies a memory location. + * + * To specify a gpu, set type = @p hipMemLocationTypeDevice and set id = the gpu's device ID + */ +typedef struct hipMemLocation { + hipMemLocationType type; ///< Specifies the location type, which describes the meaning of id + int id; ///< Identifier for the provided location type @p hipMemLocationType +} hipMemLocation; + +/** + * Flags to specify for copies within a batch. Used with hipMemcpyBatchAsync + */ +typedef enum hipMemcpyFlags { + hipMemcpyFlagDefault = 0x0, ///< Default flag + hipMemcpyFlagPreferOverlapWithCompute = 0x1 ///< Tries to overlap copy with compute work. +} hipMemcpyFlags; + +/** + * Flags to specify order in which source pointer is accessed by Batch memcpy + */ +typedef enum hipMemcpySrcAccessOrder { + hipMemcpySrcAccessOrderInvalid = 0x0, ///< Default Invalid. + hipMemcpySrcAccessOrderStream = 0x1, ///< Access to source pointer must be in stream order. + hipMemcpySrcAccessOrderDuringApiCall = + 0x2, ///< Access to source pointer can be out of stream order and all accesses must be + ///< complete before API call returns. + hipMemcpySrcAccessOrderAny = + 0x3, ///< Access to the source pointer can be out of stream order and the accesses can happen + ///< even after the API call return. + hipMemcpySrcAccessOrderMax = 0x7FFFFFFF +} hipMemcpySrcAccessOrder; + +/** + * Attributes for copies within a batch. + */ +typedef struct hipMemcpyAttributes { + hipMemcpySrcAccessOrder + srcAccessOrder; ///< Source access ordering to be observed for copies with this attribute. + hipMemLocation srcLocHint; ///< Location hint for src operand. + hipMemLocation dstLocHint; ///< Location hint for destination operand. + unsigned int flags; ///< Additional Flags for copies. See hipMemcpyFlags. +} hipMemcpyAttributes; +/** + * Operand types for individual copies within a batch + */ +typedef enum hipMemcpy3DOperandType { + hipMemcpyOperandTypePointer = 0x1, ///< Mempcy operand is a valid pointer. + hipMemcpyOperandTypeArray = 0x2, ///< Memcpy operand is a valid hipArray. + hipMemcpyOperandTypeMax = 0x7FFFFFFF +} hipMemcpy3DOperandType; + +/** + * Struct representing offset into a hipArray_t in elements. + */ +typedef struct hipOffset3D { + size_t x; + size_t y; + size_t z; +} hipOffset3D; +/** + * Struct representing an operand for copy with hipMemcpy3DBatchAsync. + */ +typedef struct hipMemcpy3DOperand { + hipMemcpy3DOperandType type; + union { + struct { + void* ptr; + size_t rowLength; ///< Length of each row in elements. + size_t layerHeight; ///< Height of each layer in elements. + hipMemLocation locHint; ///< Location Hint for the operand. + } ptr; + struct { + hipArray_t array; ///< Array struct for hipMemcpyOperandTypeArray. + hipOffset3D offset; ///< Offset into array in elements. + } array; + } op; +} hipMemcpy3DOperand; + +/** + * HIP 3D Batch Op + */ +typedef struct hipMemcpy3DBatchOp { + hipMemcpy3DOperand src; + hipMemcpy3DOperand dst; + hipExtent extent; + hipMemcpySrcAccessOrder srcAccessOrder; + unsigned int flags; +} hipMemcpy3DBatchOp; + +typedef struct hipMemcpy3DPeerParms { + hipArray_t srcArray; ///< Source memory address + hipPos srcPos; ///< Source position offset + hipPitchedPtr srcPtr; ///< Pitched source memory address + int srcDevice; ///< Source device + hipArray_t dstArray; ///< Destination memory address + hipPos dstPos; ///< Destination position offset + hipPitchedPtr dstPtr; ///< Pitched destination memory address + int dstDevice; ///< Destination device + hipExtent extent; ///< Requested memory copy size +} hipMemcpy3DPeerParms; + +/** + * @brief Make hipPitchedPtr + * + * @param [in] d Pointer to the allocated memory + * @param [in] p Pitch in bytes + * @param [in] xsz Logical size of the first dimension of allocation in elements + * @param [in] ysz Logical size of the second dimension of allocation in elements + * + * @returns The created hipPitchedPtr + */ +static inline struct hipPitchedPtr make_hipPitchedPtr(void* d, size_t p, size_t xsz, size_t ysz) { + struct hipPitchedPtr s; + s.ptr = d; + s.pitch = p; + s.xsize = xsz; + s.ysize = ysz; + return s; +} +/** + * @brief Make hipPos struct + * + * @param [in] x X coordinate of the new hipPos + * @param [in] y Y coordinate of the new hipPos + * @param [in] z Z coordinate of the new hipPos + * + * @returns The created hipPos struct + */ +static inline struct hipPos make_hipPos(size_t x, size_t y, size_t z) { + struct hipPos p; + p.x = x; + p.y = y; + p.z = z; + return p; +} +/** + * @brief Make hipExtent struct + * + * @param [in] w Width of the new hipExtent + * @param [in] h Height of the new hipExtent + * @param [in] d Depth of the new hipExtent + * + * @returns The created hipExtent struct + */ +static inline struct hipExtent make_hipExtent(size_t w, size_t h, size_t d) { + struct hipExtent e; + e.width = w; + e.height = h; + e.depth = d; + return e; +} +typedef enum hipFunction_attribute { + HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, ///< The maximum number of threads per block. Depends + ///< on function and device. + HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, ///< The statically allocated shared memory size in bytes + ///< per block required by the function. + HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES, ///< The user-allocated constant memory by the function in + ///< bytes. + HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES, ///< The local memory usage of each thread by this function + ///< in bytes. + HIP_FUNC_ATTRIBUTE_NUM_REGS, ///< The number of registers used by each thread of this function. + HIP_FUNC_ATTRIBUTE_PTX_VERSION, ///< PTX version + HIP_FUNC_ATTRIBUTE_BINARY_VERSION, ///< Binary version + HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA, ///< Cache mode + HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, ///< The maximum dynamic shared memory per + ///< block for this function in bytes. + HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT, ///< The shared memory carveout preference + ///< in percent of the maximum shared + ///< memory. + HIP_FUNC_ATTRIBUTE_MAX +} hipFunction_attribute; + +typedef enum hipPointer_attribute { + HIP_POINTER_ATTRIBUTE_CONTEXT = 1, ///< The context on which a pointer was allocated + ///< @warning This attribute is not supported in HIP + HIP_POINTER_ATTRIBUTE_MEMORY_TYPE, ///< memory type describing the location of a pointer + HIP_POINTER_ATTRIBUTE_DEVICE_POINTER, ///< address at which the pointer is allocated on the + ///< device + HIP_POINTER_ATTRIBUTE_HOST_POINTER, ///< address at which the pointer is allocated on the host + HIP_POINTER_ATTRIBUTE_P2P_TOKENS, ///< A pair of tokens for use with Linux kernel interface + ///< @warning This attribute is not supported in HIP + HIP_POINTER_ATTRIBUTE_SYNC_MEMOPS, ///< Synchronize every synchronous memory operation + ///< initiated on this region + HIP_POINTER_ATTRIBUTE_BUFFER_ID, ///< Unique ID for an allocated memory region + HIP_POINTER_ATTRIBUTE_IS_MANAGED, ///< Indicates if the pointer points to managed memory + HIP_POINTER_ATTRIBUTE_DEVICE_ORDINAL, ///< device ordinal of a device on which a pointer + ///< was allocated or registered + HIP_POINTER_ATTRIBUTE_IS_LEGACY_HIP_IPC_CAPABLE, ///< if this pointer maps to an allocation + ///< that is suitable for hipIpcGetMemHandle + ///< @warning This attribute is not supported in + ///< HIP + HIP_POINTER_ATTRIBUTE_RANGE_START_ADDR, ///< Starting address for this requested pointer + HIP_POINTER_ATTRIBUTE_RANGE_SIZE, ///< Size of the address range for this requested pointer + HIP_POINTER_ATTRIBUTE_MAPPED, ///< tells if this pointer is in a valid address range + ///< that is mapped to a backing allocation + HIP_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES, ///< Bitmask of allowed hipmemAllocationHandleType + ///< for this allocation @warning This attribute is + ///< not supported in HIP + HIP_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE, ///< returns if the memory referenced by + ///< this pointer can be used with the + ///< GPUDirect RDMA API + ///< @warning This attribute is not supported + ///< in HIP + HIP_POINTER_ATTRIBUTE_ACCESS_FLAGS, ///< Returns the access flags the device associated with + ///< for the corresponding memory referenced by the ptr + HIP_POINTER_ATTRIBUTE_MEMPOOL_HANDLE ///< Returns the mempool handle for the allocation if + ///< it was allocated from a mempool + ///< @warning This attribute is not supported in HIP +} hipPointer_attribute; + +// doxygen end DriverTypes +/** + * @} + */ + +#endif // !defined(__HIPCC_RTC__) +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif +#endif diff --git a/external/hip/hip/hip_bf16.h b/external/hip/hip/hip_bf16.h new file mode 100644 index 0000000..1783946 --- /dev/null +++ b/external/hip/hip/hip_bf16.h @@ -0,0 +1,36 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HIP_BF16_H +#define HIP_INCLUDE_HIP_HIP_BF16_H + +#include + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) +#include +#elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#include +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif + +#endif // HIP_INCLUDE_HIP_HIP_BF16_H diff --git a/external/hip/hip/hip_bfloat16.h b/external/hip/hip/hip_bfloat16.h new file mode 100644 index 0000000..fbbfd10 --- /dev/null +++ b/external/hip/hip/hip_bfloat16.h @@ -0,0 +1,44 @@ +/** + * MIT License + * + * Copyright (c) 2019 - 2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/*!\file + * \brief hip_bfloat16.h provides struct for hip_bfloat16 typedef + */ + +#ifndef _HIP_BFLOAT16_H_ +#define _HIP_BFLOAT16_H_ + +#if !defined(__HIPCC_RTC__) +#include +#endif + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) +#include +#elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#warning "hip_bfloat16.h is not supported on nvidia platform" +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif + +#endif // _HIP_BFLOAT16_H_ diff --git a/external/hip/hip/hip_common.h b/external/hip/hip/hip_common.h new file mode 100644 index 0000000..4a7dcff --- /dev/null +++ b/external/hip/hip/hip_common.h @@ -0,0 +1,100 @@ +/* +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HIP_COMMON_H +#define HIP_INCLUDE_HIP_HIP_COMMON_H + +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wreserved-macro-identifier" +#endif +// Common code included at start of every hip file. +// Auto enable __HIP_PLATFORM_AMD__ if compiling on AMD platform +// Other compiler (GCC,ICC,etc) need to set one of these macros explicitly +#if defined(__clang__) && defined(__HIP__) +#ifndef __HIP_PLATFORM_AMD__ +#define __HIP_PLATFORM_AMD__ +#endif +#endif // defined(__clang__) && defined(__HIP__) + +// Auto enable __HIP_PLATFORM_NVIDIA__ if compiling with NVIDIA platform +#if defined(__NVCC__) || (defined(__clang__) && defined(__CUDA__) && !defined(__HIP__)) +#ifndef __HIP_PLATFORM_NVIDIA__ +#define __HIP_PLATFORM_NVIDIA__ +#endif + +#ifdef __CUDACC__ +#define __HIPCC__ +#endif + +#endif //__NVCC__ + +// Auto enable __HIP_DEVICE_COMPILE__ if compiled in HCC or NVCC device path +#if (defined(__HCC_ACCELERATOR__) && __HCC_ACCELERATOR__ != 0) || \ + (defined(__CUDA_ARCH__) && __CUDA_ARCH__ != 0) +#define __HIP_DEVICE_COMPILE__ 1 +#endif + +#ifdef __GNUC__ +#define HIP_PUBLIC_API __attribute__((visibility("default"))) +#define HIP_INTERNAL_EXPORTED_API __attribute__((visibility("default"))) +#else +#define HIP_PUBLIC_API +#define HIP_INTERNAL_EXPORTED_API +#endif + +#if __HIP_DEVICE_COMPILE__ == 0 +// 32-bit Atomics +#define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (0) +#define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (0) +#define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (0) +#define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (0) +#define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (0) + +// 64-bit Atomics +#define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (0) +#define __HIP_ARCH_HAS_SHARED_INT64_ATOMICS__ (0) + +// Doubles +#define __HIP_ARCH_HAS_DOUBLES__ (0) + +// Warp cross-lane operations +#define __HIP_ARCH_HAS_WARP_VOTE__ (0) +#define __HIP_ARCH_HAS_WARP_BALLOT__ (0) +#define __HIP_ARCH_HAS_WARP_SHUFFLE__ (0) +#define __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ (0) + +// Sync +#define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (0) +#define __HIP_ARCH_HAS_SYNC_THREAD_EXT__ (0) + +// Misc +#define __HIP_ARCH_HAS_SURFACE_FUNCS__ (0) +#define __HIP_ARCH_HAS_3DGRID__ (0) +#define __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ (0) +#endif + +#if defined(__clang__) +#pragma clang diagnostic pop +#endif + +#endif diff --git a/external/hip/hip/hip_complex.h b/external/hip/hip/hip_complex.h new file mode 100644 index 0000000..66ff0b3 --- /dev/null +++ b/external/hip/hip/hip_complex.h @@ -0,0 +1,38 @@ +/* +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HIP_COMPLEX_H +#define HIP_INCLUDE_HIP_HIP_COMPLEX_H + +#if !defined(__HIPCC_RTC__) +#include +#endif + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) +#include +#elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#include +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif + +#endif diff --git a/external/hip/hip/hip_cooperative_groups.h b/external/hip/hip/hip_cooperative_groups.h new file mode 100644 index 0000000..6734819 --- /dev/null +++ b/external/hip/hip/hip_cooperative_groups.h @@ -0,0 +1,46 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @file hip_cooperative_groups.h + * + * @brief Defines new types and device API wrappers for `Cooperative Group` + * feature. + */ + +#ifndef HIP_INCLUDE_HIP_HIP_COOPERATIVE_GROUP_H +#define HIP_INCLUDE_HIP_HIP_COOPERATIVE_GROUP_H + +#include +#include + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) +#if __cplusplus && defined(__clang__) && defined(__HIP__) +#include +#endif +#elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#include +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif + +#endif // HIP_INCLUDE_HIP_HIP_COOPERATIVE_GROUP_H diff --git a/external/hip/hip/hip_deprecated.h b/external/hip/hip/hip_deprecated.h new file mode 100644 index 0000000..91c58e2 --- /dev/null +++ b/external/hip/hip/hip_deprecated.h @@ -0,0 +1,119 @@ +/* + * Copyright (C) Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#pragma once + +// This file will add older hip functions used in the versioning system +// Find the deprecated functions and structs in hip_device.cpp + +// This struct is also kept in hip_device.cpp +typedef struct hipDeviceProp_tR0000 { + char name[256]; ///< Device name. + size_t totalGlobalMem; ///< Size of global memory region (in bytes). + size_t sharedMemPerBlock; ///< Size of shared memory region (in bytes). + int regsPerBlock; ///< Registers per block. + int warpSize; ///< Warp size. + int maxThreadsPerBlock; ///< Max work items per work group or workgroup max size. + int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block. + int maxGridSize[3]; ///< Max grid dimensions (XYZ). + int clockRate; ///< Max clock frequency of the multiProcessors in khz. + int memoryClockRate; ///< Max global memory clock frequency in khz. + int memoryBusWidth; ///< Global memory bus width in bits. + size_t totalConstMem; ///< Size of shared memory region (in bytes). + int major; ///< Major compute capability. On HCC, this is an approximation and features may + ///< differ from CUDA CC. See the arch feature flags for portable ways to query + ///< feature caps. + int minor; ///< Minor compute capability. On HCC, this is an approximation and features may + ///< differ from CUDA CC. See the arch feature flags for portable ways to query + ///< feature caps. + int multiProcessorCount; ///< Number of multi-processors. When the GPU works in Compute + ///< Unit (CU) mode, this value equals the number of CUs; + ///< when in Workgroup Processor (WGP) mode, this value equels + ///< half of CUs, because a single WGP contains two CUs. + int l2CacheSize; ///< L2 cache size. + int maxThreadsPerMultiProcessor; ///< Maximum resident threads per multi-processor. + int computeMode; ///< Compute mode. + int clockInstructionRate; ///< Frequency in khz of the timer used by the device-side "clock*" + ///< instructions. New for HIP. + hipDeviceArch_t arch; ///< Architectural feature flags. New for HIP. + int concurrentKernels; ///< Device can possibly execute multiple kernels concurrently. + int pciDomainID; ///< PCI Domain ID + int pciBusID; ///< PCI Bus ID. + int pciDeviceID; ///< PCI Device ID. + size_t maxSharedMemoryPerMultiProcessor; ///< Maximum Shared Memory Per Multiprocessor. + int isMultiGpuBoard; ///< 1 if device is on a multi-GPU board, 0 if not. + int canMapHostMemory; ///< Check whether HIP can map host memory + int gcnArch; ///< DEPRECATED: use gcnArchName instead + char gcnArchName[256]; ///< AMD GCN Arch Name. + int integrated; ///< APU vs dGPU + int cooperativeLaunch; ///< HIP device supports cooperative launch + int cooperativeMultiDeviceLaunch; ///< HIP device supports cooperative launch on multiple + ///< devices + int maxTexture1DLinear; ///< Maximum size for 1D textures bound to linear memory + int maxTexture1D; ///< Maximum number of elements in 1D images + int maxTexture2D[2]; ///< Maximum dimensions (width, height) of 2D images, in image elements + int maxTexture3D[3]; ///< Maximum dimensions (width, height, depth) of 3D images, in image + ///< elements + unsigned int* hdpMemFlushCntl; ///< Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register + unsigned int* hdpRegFlushCntl; ///< Addres of HDP_REG_COHERENCY_FLUSH_CNTL register + size_t memPitch; ///< Maximum pitch in bytes allowed by memory copies + size_t textureAlignment; ///< Alignment requirement for textures + size_t texturePitchAlignment; ///< Pitch alignment requirement for texture references bound to + ///< pitched memory + int kernelExecTimeoutEnabled; ///< Run time limit for kernels executed on the device + int ECCEnabled; ///< Device has ECC support enabled + int tccDriver; ///< 1:If device is Tesla device using TCC driver, else 0 + int cooperativeMultiDeviceUnmatchedFunc; ///< HIP device supports cooperative launch on + ///< multiple + /// devices with unmatched functions + int cooperativeMultiDeviceUnmatchedGridDim; ///< HIP device supports cooperative launch on + ///< multiple + /// devices with unmatched grid dimensions + int cooperativeMultiDeviceUnmatchedBlockDim; ///< HIP device supports cooperative launch on + ///< multiple + /// devices with unmatched block dimensions + int cooperativeMultiDeviceUnmatchedSharedMem; ///< HIP device supports cooperative launch on + ///< multiple + /// devices with unmatched shared memories + int isLargeBar; ///< 1: if it is a large PCI bar device, else 0 + int asicRevision; ///< Revision of the GPU in this device + int managedMemory; ///< Device supports allocating managed memory on this system + int directManagedMemAccessFromHost; ///< Host can directly access managed memory on the device + ///< without migration + int concurrentManagedAccess; ///< Device can coherently access managed memory concurrently with + ///< the CPU + int pageableMemoryAccess; ///< Device supports coherently accessing pageable memory + ///< without calling hipHostRegister on it + int pageableMemoryAccessUsesHostPageTables; ///< Device accesses pageable memory via the host's + ///< page tables +} hipDeviceProp_tR0000; + + +#ifdef __cplusplus +extern "C" { +#endif + +hipError_t hipGetDevicePropertiesR0000(hipDeviceProp_tR0000* prop, int device); +hipError_t hipChooseDeviceR0000(int* device, const hipDeviceProp_tR0000* prop); + +#ifdef __cplusplus +} +#endif diff --git a/external/hip/hip/hip_ext.h b/external/hip/hip/hip_ext.h new file mode 100644 index 0000000..7d475be --- /dev/null +++ b/external/hip/hip/hip_ext.h @@ -0,0 +1,162 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HIP_EXT_H +#define HIP_INCLUDE_HIP_HIP_EXT_H +#include "hip/hip_runtime.h" +#if defined(__cplusplus) +#include +#include +#endif +/** @addtogroup Execution Execution Control + * @{ + */ + +/** + * @brief Launches kernel with parameters and shared memory on stream with arguments passed + * to kernel params or extra arguments. + * + * @param [in] f Kernel to launch. + * @param [in] globalWorkSizeX X grid dimension specified in work-items. + * @param [in] globalWorkSizeY Y grid dimension specified in work-items. + * @param [in] globalWorkSizeZ Z grid dimension specified in work-items. + * @param [in] localWorkSizeX X block dimension specified in work-items. + * @param [in] localWorkSizeY Y block dimension specified in work-items. + * @param [in] localWorkSizeZ Z block dimension specified in work-items. + * @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. + * HIP-Clang compiler provides support for extern shared declarations. + * @param [in] hStream Stream where the kernel should be dispatched. + * May be 0, in which case the default stream is used with associated synchronization rules. + * @param [in] kernelParams pointer to kernel parameters. + * @param [in] extra Pointer to kernel arguments. These are passed directly to the kernel and + * must be in the memory layout and alignment expected by the kernel. + * All passed arguments must be naturally aligned according to their type. The memory address of + * each argument should be a multiple of its size in bytes. Please refer to + * hip_porting_driver_api.md for sample usage. + * @param [in] startEvent If non-null, specified event will be updated to track the start time of + * the kernel launch. The event must be created before calling this API. + * @param [in] stopEvent If non-null, specified event will be updated to track the stop time of + * the kernel launch. The event must be created before calling this API. + * @param [in] flags The value of hipExtAnyOrderLaunch, signifies if kernel can be + * launched in any order. + * @returns #hipSuccess, #hipInvalidDeviceId, #hipErrorNotInitialized, #hipErrorInvalidValue. + * + * HIP/ROCm actually updates the start event when the associated kernel completes. + * Currently, timing between startEvent and stopEvent does not include the time it takes to perform + * a system scope release/cache flush - only the time it takes to issues writes to cache. + * + * @note For this HIP API, the flag 'hipExtAnyOrderLaunch' is not supported on AMD GFX9xx boards. + * + */ +HIP_PUBLIC_API +extern "C" hipError_t hipExtModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, + uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ, + uint32_t localWorkSizeX, uint32_t localWorkSizeY, + uint32_t localWorkSizeZ, size_t sharedMemBytes, + hipStream_t hStream, void** kernelParams, + void** extra, hipEvent_t startEvent __dparm(NULL), + hipEvent_t stopEvent __dparm(NULL), + uint32_t flags __dparm(0)); +/** + * @brief This HIP API is deprecated, please use hipExtModuleLaunchKernel() instead. + * + */ +HIP_DEPRECATED("use hipExtModuleLaunchKernel instead") +HIP_PUBLIC_API +extern "C" hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, + uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ, + uint32_t localWorkSizeX, uint32_t localWorkSizeY, + uint32_t localWorkSizeZ, size_t sharedMemBytes, + hipStream_t hStream, void** kernelParams, + void** extra, hipEvent_t startEvent __dparm(NULL), + hipEvent_t stopEvent __dparm(NULL)); + +#if defined(__cplusplus) + +/** + * @brief Launches kernel from the pointer address, with arguments and shared memory on stream. + * + * @param [in] function_address pointer to the Kernel to launch. + * @param [in] numBlocks number of blocks. + * @param [in] dimBlocks dimension of a block. + * @param [in] args pointer to kernel arguments. + * @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. + * HIP-Clang compiler provides support for extern shared declarations. + * @param [in] stream Stream where the kernel should be dispatched. + * May be 0, in which case the default stream is used with associated synchronization rules. + * @param [in] startEvent If non-null, specified event will be updated to track the start time of + * the kernel launch. The event must be created before calling this API. + * @param [in] stopEvent If non-null, specified event will be updated to track the stop time of + * the kernel launch. The event must be created before calling this API. + * @param [in] flags The value of hipExtAnyOrderLaunch, signifies if kernel can be + * launched in any order. + * @returns #hipSuccess, #hipInvalidDeviceId, #hipErrorNotInitialized, #hipErrorInvalidValue. + * + */ +extern "C" hipError_t hipExtLaunchKernel(const void* function_address, dim3 numBlocks, + dim3 dimBlocks, void** args, size_t sharedMemBytes, + hipStream_t stream, hipEvent_t startEvent, + hipEvent_t stopEvent, int flags); + +/** + * @brief Launches kernel with dimention parameters and shared memory on stream with templated + * kernel and arguments. + * + * @param [in] kernel Kernel to launch. + * @param [in] numBlocks const number of blocks. + * @param [in] dimBlocks const dimension of a block. + * @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. + * HIP-Clang compiler provides support for extern shared declarations. + * @param [in] stream Stream where the kernel should be dispatched. + * May be 0, in which case the default stream is used with associated synchronization rules. + * @param [in] startEvent If non-null, specified event will be updated to track the start time of + * the kernel launch. The event must be created before calling this API. + * @param [in] stopEvent If non-null, specified event will be updated to track the stop time of + * the kernel launch. The event must be created before calling this API. + * @param [in] flags The value of hipExtAnyOrderLaunch, signifies if kernel can be + * launched in any order. + * @param [in] args templated kernel arguments. + * + */ +template +inline void hipExtLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks, + std::uint32_t sharedMemBytes, hipStream_t stream, + hipEvent_t startEvent, hipEvent_t stopEvent, std::uint32_t flags, + Args... args) { + constexpr size_t count = sizeof...(Args); + auto tup_ = std::tuple{args...}; + auto tup = validateArgsCountType(kernel, tup_); + void* _Args[count]; + pArgs<0>(tup, _Args); + + auto k = reinterpret_cast(kernel); + hipExtLaunchKernel(k, numBlocks, dimBlocks, _Args, sharedMemBytes, stream, startEvent, stopEvent, + (int)flags); +} + +#endif // defined(__cplusplus) + +// doxygen end AMD-specific features +/** + * @} + */ +#endif // #iidef HIP_INCLUDE_HIP_HIP_EXT_H diff --git a/external/hip/hip/hip_ext_ocp.h b/external/hip/hip/hip_ext_ocp.h new file mode 100644 index 0000000..1037525 --- /dev/null +++ b/external/hip/hip/hip_ext_ocp.h @@ -0,0 +1,31 @@ +/* +Copyright (c) 2024 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HIP_EXT_OCP_H +#define HIP_INCLUDE_HIP_HIP_EXT_OCP_H + +#include + +#include +#include + +#endif // HIP_INCLUDE_HIP_HIP_EXT_OCP_H diff --git a/external/hip/hip/hip_fp16.h b/external/hip/hip/hip_fp16.h new file mode 100644 index 0000000..bf60a3b --- /dev/null +++ b/external/hip/hip/hip_fp16.h @@ -0,0 +1,44 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HIP_FP16_H +#define HIP_INCLUDE_HIP_HIP_FP16_H + +#include + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) +#include +#elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#define HIPRT_INF_FP16 CUDART_INF_FP16 +#define HIPRT_MAX_NORMAL_FP16 CUDART_MAX_NORMAL_FP16 +#define HIPRT_MIN_DENORM_FP16 CUDART_MIN_DENORM_FP16 +#define HIPRT_NAN_FP16 CUDART_NAN_FP16 +#define HIPRT_NEG_ZERO_FP16 CUDART_NEG_ZERO_FP16 +#define HIPRT_ONE_FP16 CUDART_ONE_FP16 +#define HIPRT_ZERO_FP16 CUDART_ZERO_FP16 + +#include "cuda_fp16.h" +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif + +#endif diff --git a/external/hip/hip/hip_fp4.h b/external/hip/hip/hip_fp4.h new file mode 100644 index 0000000..59fb5da --- /dev/null +++ b/external/hip/hip/hip_fp4.h @@ -0,0 +1,32 @@ +/* +Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HIP_FP4_H +#define HIP_INCLUDE_HIP_HIP_FP4_H + +#include + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) +#include +#endif + +#endif // HIP_INCLUDE_HIP_HIP_FP4_H diff --git a/external/hip/hip/hip_fp6.h b/external/hip/hip/hip_fp6.h new file mode 100644 index 0000000..72d642a --- /dev/null +++ b/external/hip/hip/hip_fp6.h @@ -0,0 +1,32 @@ +/* +Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HIP_FP6_H +#define HIP_INCLUDE_HIP_HIP_FP6_H + +#include + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) +#include +#endif + +#endif // HIP_INCLUDE_HIP_HIP_FP6_H diff --git a/external/hip/hip/hip_fp8.h b/external/hip/hip/hip_fp8.h new file mode 100644 index 0000000..82f47af --- /dev/null +++ b/external/hip/hip/hip_fp8.h @@ -0,0 +1,33 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HIP_FP8_H +#define HIP_INCLUDE_HIP_HIP_FP8_H + +#include + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) +// We only have fnuz defs for now, which are not supported by other platforms +#include +#endif + +#endif // HIP_INCLUDE_HIP_HIP_FP8_H diff --git a/external/hip/hip/hip_gl_interop.h b/external/hip/hip/hip_gl_interop.h new file mode 100644 index 0000000..8af6ec3 --- /dev/null +++ b/external/hip/hip/hip_gl_interop.h @@ -0,0 +1,32 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#ifndef HIP_GL_INTEROP_H +#define HIP_GL_INTEROP_H + +#include + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) +#include "hip/amd_detail/amd_hip_gl_interop.h" +#elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#include "hip/nvidia_detail/nvidia_hip_gl_interop.h" +#endif +#endif diff --git a/external/hip/hip/hip_hcc.h b/external/hip/hip/hip_hcc.h new file mode 100644 index 0000000..9e0cfad --- /dev/null +++ b/external/hip/hip/hip_hcc.h @@ -0,0 +1,24 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HIP_HCC_H +#define HIP_INCLUDE_HIP_HIP_HCC_H +#warning "hip/hip_hcc.h is deprecated, please use hip/hip_ext.h" +#include "hip/hip_ext.h" +#endif // #ifdef HIP_INCLUDE_HIP_HIP_HCC_H diff --git a/external/hip/hip/hip_math_constants.h b/external/hip/hip/hip_math_constants.h new file mode 100644 index 0000000..269767e --- /dev/null +++ b/external/hip/hip/hip_math_constants.h @@ -0,0 +1,36 @@ +/* +Copyright (c) 2015 - 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#ifndef HIP_MATH_CONSTANTS_H +#define HIP_MATH_CONSTANTS_H + +#if !defined(__HIPCC_RTC__) +#include +#endif + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) +#include "hip/amd_detail/amd_hip_math_constants.h" +#elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#include "hip/nvidia_detail/nvidia_hip_math_constants.h" +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif +#endif diff --git a/external/hip/hip/hip_profile.h b/external/hip/hip/hip_profile.h new file mode 100644 index 0000000..4fef521 --- /dev/null +++ b/external/hip/hip/hip_profile.h @@ -0,0 +1,27 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HIP_PROFILE_H +#define HIP_INCLUDE_HIP_HIP_PROFILE_H + +#define HIP_SCOPED_MARKER(markerName, group) +#define HIP_BEGIN_MARKER(markerName, group) +#define HIP_END_MARKER() + +#endif diff --git a/external/hip/hip/hip_runtime.h b/external/hip/hip/hip_runtime.h new file mode 100644 index 0000000..7834d0e --- /dev/null +++ b/external/hip/hip/hip_runtime.h @@ -0,0 +1,70 @@ +/* +Copyright (c) 2015 - 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +//! HIP = Heterogeneous-compute Interface for Portability +//! +//! Define a extremely thin runtime layer that allows source code to be compiled unmodified +//! through either AMD CLANG or NVCC. Key features tend to be in the spirit +//! and terminology of CUDA, but with a portable path to other accelerators as well: +// +//! Both paths support rich C++ features including classes, templates, lambdas, etc. +//! Runtime API is C +//! Memory management is based on pure pointers and resembles malloc/free/copy. +// +//! hip_runtime.h : includes everything in hip_api.h, plus math builtins and kernel launch +//! macros. hip_runtime_api.h : Defines HIP API. This is a C header file and does not use any C++ +//! features. + +#ifndef HIP_INCLUDE_HIP_HIP_RUNTIME_H +#define HIP_INCLUDE_HIP_HIP_RUNTIME_H + +#if !defined(__HIPCC_RTC__) +// Some standard header files, these are included by hc.hpp and so want to make them avail on both +// paths to provide a consistent include env and avoid "missing symbol" errors that only appears +// on NVCC path: +#if __cplusplus +#include +#include +#else +#include +#include +#endif // __cplusplus +#endif // !defined(__HIPCC_RTC__) + +#include +#include + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) +#include +#elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#include +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif + +#if !defined(__HIPCC_RTC__) +#include +#include +#endif // !defined(__HIPCC_RTC__) +#include + +#endif diff --git a/external/hip/hip/hip_runtime_api.h b/external/hip/hip/hip_runtime_api.h new file mode 100644 index 0000000..5148481 --- /dev/null +++ b/external/hip/hip/hip_runtime_api.h @@ -0,0 +1,10431 @@ +/* +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + +* @file hip_runtime_api.h + * + * @brief Defines the API signatures for HIP runtime. + * This file can be compiled with a standard compiler. + */ + +#ifndef HIP_INCLUDE_HIP_HIP_RUNTIME_API_H +#define HIP_INCLUDE_HIP_HIP_RUNTIME_API_H + +#if __cplusplus +#include +#include +#include +#include +#else +#include +#include +#include +#endif + +#include +#include +#include + +enum { + HIP_SUCCESS = 0, + HIP_ERROR_INVALID_VALUE, + HIP_ERROR_NOT_INITIALIZED, + HIP_ERROR_LAUNCH_OUT_OF_RESOURCES +}; +// hack to get these to show up in Doxygen: +/** + * @defgroup GlobalDefs Global enum and defines + * @{ + * + */ +/** + * hipDeviceArch_t + * + */ +typedef struct { + // 32-bit Atomics + unsigned hasGlobalInt32Atomics : 1; ///< 32-bit integer atomics for global memory. + unsigned hasGlobalFloatAtomicExch : 1; ///< 32-bit float atomic exch for global memory. + unsigned hasSharedInt32Atomics : 1; ///< 32-bit integer atomics for shared memory. + unsigned hasSharedFloatAtomicExch : 1; ///< 32-bit float atomic exch for shared memory. + unsigned hasFloatAtomicAdd : 1; ///< 32-bit float atomic add in global and shared memory. + + // 64-bit Atomics + unsigned hasGlobalInt64Atomics : 1; ///< 64-bit integer atomics for global memory. + unsigned hasSharedInt64Atomics : 1; ///< 64-bit integer atomics for shared memory. + + // Doubles + unsigned hasDoubles : 1; ///< Double-precision floating point. + + // Warp cross-lane operations + unsigned hasWarpVote : 1; ///< Warp vote instructions (__any, __all). + unsigned hasWarpBallot : 1; ///< Warp ballot instructions (__ballot). + unsigned hasWarpShuffle : 1; ///< Warp shuffle operations. (__shfl_*). + unsigned hasFunnelShift : 1; ///< Funnel two words into one with shift&mask caps. + + // Sync + unsigned hasThreadFenceSystem : 1; ///< __threadfence_system. + unsigned hasSyncThreadsExt : 1; ///< __syncthreads_count, syncthreads_and, syncthreads_or. + + // Misc + unsigned hasSurfaceFuncs : 1; ///< Surface functions. + unsigned has3dGrid : 1; ///< Grid and group dims are 3D (rather than 2D). + unsigned hasDynamicParallelism : 1; ///< Dynamic parallelism. +} hipDeviceArch_t; + +typedef struct hipUUID_t { + char bytes[16]; +} hipUUID; + +//--- +// Common headers for both NVCC and HIP-Clang paths: + +#define hipGetDeviceProperties hipGetDevicePropertiesR0600 +#define hipDeviceProp_t hipDeviceProp_tR0600 +#define hipChooseDevice hipChooseDeviceR0600 + +/** + * hipDeviceProp + * + */ +typedef struct hipDeviceProp_t { + char name[256]; ///< Device name. + hipUUID uuid; ///< UUID of a device + char luid[8]; ///< 8-byte unique identifier. Only valid on windows + unsigned int luidDeviceNodeMask; ///< LUID node mask + size_t totalGlobalMem; ///< Size of global memory region (in bytes). + size_t sharedMemPerBlock; ///< Size of shared memory per block (in bytes). + int regsPerBlock; ///< Registers per block. + int warpSize; ///< Warp size. + size_t memPitch; ///< Maximum pitch in bytes allowed by memory copies + ///< pitched memory + int maxThreadsPerBlock; ///< Max work items per work group or workgroup max size. + int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block. + int maxGridSize[3]; ///< Max grid dimensions (XYZ). + int clockRate; ///< Max clock frequency of the multiProcessors in khz. + size_t totalConstMem; ///< Size of shared constant memory region on the device + ///< (in bytes). + int major; ///< Major compute capability version. This indicates the core instruction set + ///< of the GPU architecture. For example, a value of 11 would correspond to + ///< Navi III (RDNA3). See the arch feature flags for portable ways to query + ///< feature caps. + int minor; ///< Minor compute capability version. This indicates a particular configuration, + ///< feature set, or variation within the group represented by the major compute + ///< capability version. For example, different models within the same major version + ///< might have varying levels of support for certain features or optimizations. + ///< See the arch feature flags for portable ways to query feature caps. + size_t textureAlignment; ///< Alignment requirement for textures + size_t texturePitchAlignment; ///< Pitch alignment requirement for texture references bound to + int deviceOverlap; ///< Deprecated. Use asyncEngineCount instead + int multiProcessorCount; ///< Number of multi-processors. When the GPU works in Compute + ///< Unit (CU) mode, this value equals the number of CUs; + ///< when in Workgroup Processor (WGP) mode, this value equels + ///< half of CUs, because a single WGP contains two CUs. + int kernelExecTimeoutEnabled; ///< Run time limit for kernels executed on the device + int integrated; ///< APU vs dGPU + int canMapHostMemory; ///< Check whether HIP can map host memory + int computeMode; ///< Compute mode. + int maxTexture1D; ///< Maximum number of elements in 1D images + int maxTexture1DMipmap; ///< Maximum 1D mipmap texture size + int maxTexture1DLinear; ///< Maximum size for 1D textures bound to linear memory + int maxTexture2D[2]; ///< Maximum dimensions (width, height) of 2D images, in image elements + int maxTexture2DMipmap[2]; ///< Maximum number of elements in 2D array mipmap of images + int maxTexture2DLinear[3]; ///< Maximum 2D tex dimensions if tex are bound to pitched memory + int maxTexture2DGather[2]; ///< Maximum 2D tex dimensions if gather has to be performed + int maxTexture3D[3]; ///< Maximum dimensions (width, height, depth) of 3D images, in image + ///< elements + int maxTexture3DAlt[3]; ///< Maximum alternate 3D texture dims + int maxTextureCubemap; ///< Maximum cubemap texture dims + int maxTexture1DLayered[2]; ///< Maximum number of elements in 1D array images + int maxTexture2DLayered[3]; ///< Maximum number of elements in 2D array images + int maxTextureCubemapLayered[2]; ///< Maximum cubemaps layered texture dims + int maxSurface1D; ///< Maximum 1D surface size + int maxSurface2D[2]; ///< Maximum 2D surface size + int maxSurface3D[3]; ///< Maximum 3D surface size + int maxSurface1DLayered[2]; ///< Maximum 1D layered surface size + int maxSurface2DLayered[3]; ///< Maximum 2D layared surface size + int maxSurfaceCubemap; ///< Maximum cubemap surface size + int maxSurfaceCubemapLayered[2]; ///< Maximum cubemap layered surface size + size_t surfaceAlignment; ///< Alignment requirement for surface + int concurrentKernels; ///< Device can possibly execute multiple kernels concurrently. + int ECCEnabled; ///< Device has ECC support enabled + int pciBusID; ///< PCI Bus ID. + int pciDeviceID; ///< PCI Device ID + int pciDomainID; ///< PCI Domain ID + int tccDriver; ///< 1:If device is Tesla device using TCC driver, else 0 + int asyncEngineCount; ///< Number of async engines + int unifiedAddressing; ///< Does device and host share unified address space + int memoryClockRate; ///< Max global memory clock frequency in khz. + int memoryBusWidth; ///< Global memory bus width in bits. + int l2CacheSize; ///< L2 cache size. + int persistingL2CacheMaxSize; ///< Device's max L2 persisting lines in bytes + int maxThreadsPerMultiProcessor; ///< Maximum resident threads per multi-processor. + int streamPrioritiesSupported; ///< Device supports stream priority + int globalL1CacheSupported; ///< Indicates globals are cached in L1 + int localL1CacheSupported; ///< Locals are cahced in L1 + size_t sharedMemPerMultiprocessor; ///< Amount of shared memory available per multiprocessor. + int regsPerMultiprocessor; ///< registers available per multiprocessor + int managedMemory; ///< Device supports allocating managed memory on this system + int isMultiGpuBoard; ///< 1 if device is on a multi-GPU board, 0 if not. + int multiGpuBoardGroupID; ///< Unique identifier for a group of devices on same multiboard GPU + int hostNativeAtomicSupported; ///< Link between host and device supports native atomics + int singleToDoublePrecisionPerfRatio; ///< Deprecated. CUDA only. + int pageableMemoryAccess; ///< Device supports coherently accessing pageable memory + ///< without calling hipHostRegister on it + int concurrentManagedAccess; ///< Device can coherently access managed memory concurrently with + ///< the CPU + int computePreemptionSupported; ///< Is compute preemption supported on the device + int canUseHostPointerForRegisteredMem; ///< Device can access host registered memory with same + ///< address as the host + int cooperativeLaunch; ///< HIP device supports cooperative launch + int cooperativeMultiDeviceLaunch; ///< HIP device supports cooperative launch on multiple + ///< devices + size_t sharedMemPerBlockOptin; ///< Per device m ax shared mem per block usable by special opt in + int pageableMemoryAccessUsesHostPageTables; ///< Device accesses pageable memory via the host's + ///< page tables + int directManagedMemAccessFromHost; ///< Host can directly access managed memory on the device + ///< without migration + int maxBlocksPerMultiProcessor; ///< Max number of blocks on CU + int accessPolicyMaxWindowSize; ///< Max value of access policy window + size_t reservedSharedMemPerBlock; ///< Shared memory reserved by driver per block + int hostRegisterSupported; ///< Device supports hipHostRegister + int sparseHipArraySupported; ///< Indicates if device supports sparse hip arrays + int hostRegisterReadOnlySupported; ///< Device supports using the hipHostRegisterReadOnly flag + ///< with hipHostRegistger + int timelineSemaphoreInteropSupported; ///< Indicates external timeline semaphore support + int memoryPoolsSupported; ///< Indicates if device supports hipMallocAsync and hipMemPool APIs + int gpuDirectRDMASupported; ///< Indicates device support of RDMA APIs + unsigned int gpuDirectRDMAFlushWritesOptions; ///< Bitmask to be interpreted according to + ///< hipFlushGPUDirectRDMAWritesOptions + int gpuDirectRDMAWritesOrdering; ///< value of hipGPUDirectRDMAWritesOrdering + unsigned int + memoryPoolSupportedHandleTypes; ///< Bitmask of handle types support with mempool based IPC + int deferredMappingHipArraySupported; ///< Device supports deferred mapping HIP arrays and HIP + ///< mipmapped arrays + int ipcEventSupported; ///< Device supports IPC events + int clusterLaunch; ///< Device supports cluster launch + int unifiedFunctionPointers; ///< Indicates device supports unified function pointers + int reserved[63]; ///< CUDA Reserved. + + int hipReserved[32]; ///< Reserved for adding new entries for HIP/CUDA. + + /* HIP Only struct members */ + char gcnArchName[256]; ///< AMD GCN Arch Name. HIP Only. + size_t maxSharedMemoryPerMultiProcessor; ///< Maximum Shared Memory Per CU. HIP Only. + int clockInstructionRate; ///< Frequency in khz of the timer used by the device-side "clock*" + ///< instructions. New for HIP. + hipDeviceArch_t arch; ///< Architectural feature flags. New for HIP. + unsigned int* hdpMemFlushCntl; ///< Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register + unsigned int* hdpRegFlushCntl; ///< Addres of HDP_REG_COHERENCY_FLUSH_CNTL register + int cooperativeMultiDeviceUnmatchedFunc; ///< HIP device supports cooperative launch on + ///< multiple + /// devices with unmatched functions + int cooperativeMultiDeviceUnmatchedGridDim; ///< HIP device supports cooperative launch on + ///< multiple + /// devices with unmatched grid dimensions + int cooperativeMultiDeviceUnmatchedBlockDim; ///< HIP device supports cooperative launch on + ///< multiple + /// devices with unmatched block dimensions + int cooperativeMultiDeviceUnmatchedSharedMem; ///< HIP device supports cooperative launch on + ///< multiple + /// devices with unmatched shared memories + int isLargeBar; ///< 1: if it is a large PCI bar device, else 0 + int asicRevision; ///< Revision of the GPU in this device +} hipDeviceProp_t; + +/** + * hipMemoryType (for pointer attributes) + * + * @note hipMemoryType enum values are combination of cudaMemoryType and cuMemoryType and AMD + * specific enum values. + * + */ +typedef enum hipMemoryType { + hipMemoryTypeUnregistered = 0, ///< Unregistered memory + hipMemoryTypeHost = 1, ///< Memory is physically located on host + hipMemoryTypeDevice = 2, ///< Memory is physically located on device. (see deviceId for + ///< specific device) + hipMemoryTypeManaged = 3, ///< Managed memory, automaticallly managed by the unified + ///< memory system + ///< place holder for new values. + hipMemoryTypeArray = 10, ///< Array memory, physically located on device. (see deviceId for + ///< specific device) + hipMemoryTypeUnified = 11 ///< unified address space + +} hipMemoryType; + +/** + * Pointer attributes + */ +typedef struct hipPointerAttribute_t { + enum hipMemoryType type; + int device; + void* devicePointer; + void* hostPointer; + int isManaged; + unsigned allocationFlags; /* flags specified when memory was allocated*/ + /* peers? */ +} hipPointerAttribute_t; + +// Ignoring error-code return values from hip APIs is discouraged. On C++17, +// we can make that yield a warning +#if __cplusplus >= 201703L +#define __HIP_NODISCARD [[nodiscard]] +#else +#define __HIP_NODISCARD +#endif + +/** + * HIP error type + * + */ +// Developer note - when updating these, update the hipErrorName and hipErrorString functions in +// NVCC and HIP-Clang paths Also update the hipCUDAErrorTohipError function in NVCC path. + +typedef enum __HIP_NODISCARD hipError_t { + hipSuccess = 0, ///< Successful completion. + hipErrorInvalidValue = 1, ///< One or more of the parameters passed to the API call is NULL + ///< or not in an acceptable range. + hipErrorOutOfMemory = 2, ///< out of memory range. + // Deprecated + hipErrorMemoryAllocation = 2, ///< Memory allocation error. + hipErrorNotInitialized = 3, ///< Invalid not initialized + // Deprecated + hipErrorInitializationError = 3, + hipErrorDeinitialized = 4, ///< Deinitialized + hipErrorProfilerDisabled = 5, + hipErrorProfilerNotInitialized = 6, + hipErrorProfilerAlreadyStarted = 7, + hipErrorProfilerAlreadyStopped = 8, + hipErrorInvalidConfiguration = 9, ///< Invalide configuration + hipErrorInvalidPitchValue = 12, ///< Invalid pitch value + hipErrorInvalidSymbol = 13, ///< Invalid symbol + hipErrorInvalidDevicePointer = 17, ///< Invalid Device Pointer + hipErrorInvalidMemcpyDirection = 21, ///< Invalid memory copy direction + hipErrorInsufficientDriver = 35, + hipErrorMissingConfiguration = 52, + hipErrorPriorLaunchFailure = 53, + hipErrorInvalidDeviceFunction = 98, ///< Invalid device function + hipErrorNoDevice = 100, ///< Call to hipGetDeviceCount returned 0 devices + hipErrorInvalidDevice = 101, ///< DeviceID must be in range from 0 to compute-devices. + hipErrorInvalidImage = 200, ///< Invalid image + hipErrorInvalidContext = 201, ///< Produced when input context is invalid. + hipErrorContextAlreadyCurrent = 202, + hipErrorMapFailed = 205, + // Deprecated + hipErrorMapBufferObjectFailed = 205, ///< Produced when the IPC memory attach failed from ROCr. + hipErrorUnmapFailed = 206, + hipErrorArrayIsMapped = 207, + hipErrorAlreadyMapped = 208, + hipErrorNoBinaryForGpu = 209, + hipErrorAlreadyAcquired = 210, + hipErrorNotMapped = 211, + hipErrorNotMappedAsArray = 212, + hipErrorNotMappedAsPointer = 213, + hipErrorECCNotCorrectable = 214, + hipErrorUnsupportedLimit = 215, ///< Unsupported limit + hipErrorContextAlreadyInUse = 216, ///< The context is already in use + hipErrorPeerAccessUnsupported = 217, + hipErrorInvalidKernelFile = 218, ///< In CUDA DRV, it is CUDA_ERROR_INVALID_PTX + hipErrorInvalidGraphicsContext = 219, + hipErrorInvalidSource = 300, ///< Invalid source. + hipErrorFileNotFound = 301, ///< the file is not found. + hipErrorSharedObjectSymbolNotFound = 302, + hipErrorSharedObjectInitFailed = 303, ///< Failed to initialize shared object. + hipErrorOperatingSystem = 304, ///< Not the correct operating system + hipErrorInvalidHandle = 400, ///< Invalide handle + // Deprecated + hipErrorInvalidResourceHandle = 400, ///< Resource handle (hipEvent_t or hipStream_t) invalid. + hipErrorIllegalState = 401, ///< Resource required is not in a valid state to perform operation. + hipErrorNotFound = 500, ///< Not found + hipErrorNotReady = 600, ///< Indicates that asynchronous operations enqueued earlier are not + ///< ready. This is not actually an error, but is used to distinguish + ///< from hipSuccess (which indicates completion). APIs that return + ///< this error include hipEventQuery and hipStreamQuery. + hipErrorIllegalAddress = 700, + hipErrorLaunchOutOfResources = 701, ///< Out of resources error. + hipErrorLaunchTimeOut = 702, ///< Timeout for the launch. + hipErrorPeerAccessAlreadyEnabled = 704, ///< Peer access was already enabled from the current + ///< device. + hipErrorPeerAccessNotEnabled = 705, ///< Peer access was never enabled from the current device. + hipErrorSetOnActiveProcess = 708, ///< The process is active. + hipErrorContextIsDestroyed = 709, ///< The context is already destroyed + hipErrorAssert = 710, ///< Produced when the kernel calls assert. + hipErrorHostMemoryAlreadyRegistered = 712, ///< Produced when trying to lock a page-locked + ///< memory. + hipErrorHostMemoryNotRegistered = 713, ///< Produced when trying to unlock a non-page-locked + ///< memory. + hipErrorLaunchFailure = 719, ///< An exception occurred on the device while executing a kernel. + hipErrorCooperativeLaunchTooLarge = 720, ///< This error indicates that the number of blocks + ///< launched per grid for a kernel that was launched + ///< via cooperative launch APIs exceeds the maximum + ///< number of allowed blocks for the current device. + hipErrorNotSupported = 801, ///< Produced when the hip API is not supported/implemented + hipErrorStreamCaptureUnsupported = 900, ///< The operation is not permitted when the stream + ///< is capturing. + hipErrorStreamCaptureInvalidated = 901, ///< The current capture sequence on the stream + ///< has been invalidated due to a previous error. + hipErrorStreamCaptureMerge = 902, ///< The operation would have resulted in a merge of + ///< two independent capture sequences. + hipErrorStreamCaptureUnmatched = 903, ///< The capture was not initiated in this stream. + hipErrorStreamCaptureUnjoined = 904, ///< The capture sequence contains a fork that was not + ///< joined to the primary stream. + hipErrorStreamCaptureIsolation = 905, ///< A dependency would have been created which crosses + ///< the capture sequence boundary. Only implicit + ///< in-stream ordering dependencies are allowed + ///< to cross the boundary + hipErrorStreamCaptureImplicit = 906, ///< The operation would have resulted in a disallowed + ///< implicit dependency on a current capture sequence + ///< from hipStreamLegacy. + hipErrorCapturedEvent = 907, ///< The operation is not permitted on an event which was last + ///< recorded in a capturing stream. + hipErrorStreamCaptureWrongThread = 908, ///< A stream capture sequence not initiated with + ///< the hipStreamCaptureModeRelaxed argument to + ///< hipStreamBeginCapture was passed to + ///< hipStreamEndCapture in a different thread. + hipErrorGraphExecUpdateFailure = 910, ///< This error indicates that the graph update + ///< not performed because it included changes which + ///< violated constraintsspecific to instantiated graph + ///< update. + hipErrorInvalidChannelDescriptor = 911, ///< Invalid channel descriptor. + hipErrorInvalidTexture = 912, ///< Invalid texture. + hipErrorUnknown = 999, ///< Unknown error. + // HSA Runtime Error Codes start here. + hipErrorRuntimeMemory = 1052, ///< HSA runtime memory call returned error. Typically not seen + ///< in production systems. + hipErrorRuntimeOther = 1053, ///< HSA runtime call other than memory returned error. Typically + ///< not seen in production systems. + hipErrorTbd ///< Marker that more error codes are needed. +} hipError_t; + +#undef __HIP_NODISCARD + +/** + * hipDeviceAttribute_t + * hipDeviceAttributeUnused number: 5 + */ +typedef enum hipDeviceAttribute_t { + hipDeviceAttributeCudaCompatibleBegin = 0, + + hipDeviceAttributeEccEnabled = + hipDeviceAttributeCudaCompatibleBegin, ///< Whether ECC support is enabled. + hipDeviceAttributeAccessPolicyMaxWindowSize, ///< Cuda only. The maximum size of the window + ///< policy in bytes. + hipDeviceAttributeAsyncEngineCount, ///< Asynchronous engines number. + hipDeviceAttributeCanMapHostMemory, ///< Whether host memory can be mapped into device address + ///< space + hipDeviceAttributeCanUseHostPointerForRegisteredMem, ///< Device can access host registered + ///< memory at the same virtual address as + ///< the CPU + hipDeviceAttributeClockRate, ///< Peak clock frequency in kilohertz. + hipDeviceAttributeComputeMode, ///< Compute mode that device is currently in. + hipDeviceAttributeComputePreemptionSupported, ///< Device supports Compute Preemption. + hipDeviceAttributeConcurrentKernels, ///< Device can possibly execute multiple kernels + ///< concurrently. + hipDeviceAttributeConcurrentManagedAccess, ///< Device can coherently access managed memory + ///< concurrently with the CPU + hipDeviceAttributeCooperativeLaunch, ///< Support cooperative launch + hipDeviceAttributeCooperativeMultiDeviceLaunch, ///< Support cooperative launch on multiple + ///< devices + hipDeviceAttributeDeviceOverlap, ///< Device can concurrently copy memory and execute a kernel. + ///< Deprecated. Use instead asyncEngineCount. + hipDeviceAttributeDirectManagedMemAccessFromHost, ///< Host can directly access managed memory on + ///< the device without migration + hipDeviceAttributeGlobalL1CacheSupported, ///< Device supports caching globals in L1 + hipDeviceAttributeHostNativeAtomicSupported, ///< Link between the device and the host supports + ///< native atomic operations + hipDeviceAttributeIntegrated, ///< Device is integrated GPU + hipDeviceAttributeIsMultiGpuBoard, ///< Multiple GPU devices. + hipDeviceAttributeKernelExecTimeout, ///< Run time limit for kernels executed on the device + hipDeviceAttributeL2CacheSize, ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 + ///< cache. + hipDeviceAttributeLocalL1CacheSupported, ///< caching locals in L1 is supported + hipDeviceAttributeLuid, ///< 8-byte locally unique identifier in 8 bytes. Undefined on TCC and + ///< non-Windows platforms + hipDeviceAttributeLuidDeviceNodeMask, ///< Luid device node mask. Undefined on TCC and + ///< non-Windows platforms + hipDeviceAttributeComputeCapabilityMajor, ///< Major compute capability version number. + hipDeviceAttributeManagedMemory, ///< Device supports allocating managed memory on this system + hipDeviceAttributeMaxBlocksPerMultiProcessor, ///< Max block size per multiprocessor + hipDeviceAttributeMaxBlockDimX, ///< Max block size in width. + hipDeviceAttributeMaxBlockDimY, ///< Max block size in height. + hipDeviceAttributeMaxBlockDimZ, ///< Max block size in depth. + hipDeviceAttributeMaxGridDimX, ///< Max grid size in width. + hipDeviceAttributeMaxGridDimY, ///< Max grid size in height. + hipDeviceAttributeMaxGridDimZ, ///< Max grid size in depth. + hipDeviceAttributeMaxSurface1D, ///< Maximum size of 1D surface. + hipDeviceAttributeMaxSurface1DLayered, ///< Cuda only. Maximum dimensions of 1D layered surface. + hipDeviceAttributeMaxSurface2D, ///< Maximum dimension (width, height) of 2D surface. + hipDeviceAttributeMaxSurface2DLayered, ///< Cuda only. Maximum dimensions of 2D layered surface. + hipDeviceAttributeMaxSurface3D, ///< Maximum dimension (width, height, depth) of 3D surface. + hipDeviceAttributeMaxSurfaceCubemap, ///< Cuda only. Maximum dimensions of Cubemap surface. + hipDeviceAttributeMaxSurfaceCubemapLayered, ///< Cuda only. Maximum dimension of Cubemap layered + ///< surface. + hipDeviceAttributeMaxTexture1DWidth, ///< Maximum size of 1D texture. + hipDeviceAttributeMaxTexture1DLayered, ///< Maximum dimensions of 1D layered texture. + hipDeviceAttributeMaxTexture1DLinear, ///< Maximum number of elements allocatable in a 1D linear + ///< texture. Use cudaDeviceGetTexture1DLinearMaxWidth() + ///< instead on Cuda. + hipDeviceAttributeMaxTexture1DMipmap, ///< Maximum size of 1D mipmapped texture. + hipDeviceAttributeMaxTexture2DWidth, ///< Maximum dimension width of 2D texture. + hipDeviceAttributeMaxTexture2DHeight, ///< Maximum dimension hight of 2D texture. + hipDeviceAttributeMaxTexture2DGather, ///< Maximum dimensions of 2D texture if gather operations + ///< performed. + hipDeviceAttributeMaxTexture2DLayered, ///< Maximum dimensions of 2D layered texture. + hipDeviceAttributeMaxTexture2DLinear, ///< Maximum dimensions (width, height, pitch) of 2D + ///< textures bound to pitched memory. + hipDeviceAttributeMaxTexture2DMipmap, ///< Maximum dimensions of 2D mipmapped texture. + hipDeviceAttributeMaxTexture3DWidth, ///< Maximum dimension width of 3D texture. + hipDeviceAttributeMaxTexture3DHeight, ///< Maximum dimension height of 3D texture. + hipDeviceAttributeMaxTexture3DDepth, ///< Maximum dimension depth of 3D texture. + hipDeviceAttributeMaxTexture3DAlt, ///< Maximum dimensions of alternate 3D texture. + hipDeviceAttributeMaxTextureCubemap, ///< Maximum dimensions of Cubemap texture + hipDeviceAttributeMaxTextureCubemapLayered, ///< Maximum dimensions of Cubemap layered texture. + hipDeviceAttributeMaxThreadsDim, ///< Maximum dimension of a block + hipDeviceAttributeMaxThreadsPerBlock, ///< Maximum number of threads per block. + hipDeviceAttributeMaxThreadsPerMultiProcessor, ///< Maximum resident threads per multiprocessor. + hipDeviceAttributeMaxPitch, ///< Maximum pitch in bytes allowed by memory copies + hipDeviceAttributeMemoryBusWidth, ///< Global memory bus width in bits. + hipDeviceAttributeMemoryClockRate, ///< Peak memory clock frequency in kilohertz. + hipDeviceAttributeComputeCapabilityMinor, ///< Minor compute capability version number. + hipDeviceAttributeMultiGpuBoardGroupID, ///< Unique ID of device group on the same multi-GPU + ///< board + hipDeviceAttributeMultiprocessorCount, ///< Number of multi-processors. When the GPU works in Compute + ///< Unit (CU) mode, this value equals the number of CUs; + ///< when in Workgroup Processor (WGP) mode, this value equels + ///< half of CUs, because a single WGP contains two CUs. + hipDeviceAttributeUnused1, ///< Previously hipDeviceAttributeName + hipDeviceAttributePageableMemoryAccess, ///< Device supports coherently accessing pageable memory + ///< without calling hipHostRegister on it + hipDeviceAttributePageableMemoryAccessUsesHostPageTables, ///< Device accesses pageable memory + ///< via the host's page tables + hipDeviceAttributePciBusId, ///< PCI Bus ID. + hipDeviceAttributePciDeviceId, ///< PCI Device ID. Returns pcie slot id + hipDeviceAttributePciDomainId, ///< PCI Domain Id. + hipDeviceAttributePciDomainID = + hipDeviceAttributePciDomainId, ///< PCI Domain ID, for backward compatibility. + hipDeviceAttributePersistingL2CacheMaxSize, ///< Maximum l2 persisting lines capacity in bytes + hipDeviceAttributeMaxRegistersPerBlock, ///< 32-bit registers available to a thread block. This + ///< number is shared by all thread blocks simultaneously + ///< resident on a multiprocessor. + hipDeviceAttributeMaxRegistersPerMultiprocessor, ///< 32-bit registers available per block. + hipDeviceAttributeReservedSharedMemPerBlock, ///< Shared memory reserved by CUDA driver per + ///< block. + hipDeviceAttributeMaxSharedMemoryPerBlock, ///< Maximum shared memory available per block in + ///< bytes. + hipDeviceAttributeSharedMemPerBlockOptin, ///< Maximum shared memory per block usable by special + ///< opt in. + hipDeviceAttributeSharedMemPerMultiprocessor, ///< Shared memory available per multiprocessor. + hipDeviceAttributeSingleToDoublePrecisionPerfRatio, ///< Cuda only. Performance ratio of single + ///< precision to double precision. + hipDeviceAttributeStreamPrioritiesSupported, ///< Whether to support stream priorities. + hipDeviceAttributeSurfaceAlignment, ///< Alignment requirement for surfaces + hipDeviceAttributeTccDriver, ///< Cuda only. Whether device is a Tesla device using TCC driver + hipDeviceAttributeTextureAlignment, ///< Alignment requirement for textures + hipDeviceAttributeTexturePitchAlignment, ///< Pitch alignment requirement for 2D texture + ///< references bound to pitched memory; + hipDeviceAttributeTotalConstantMemory, ///< Constant memory size in bytes. + hipDeviceAttributeTotalGlobalMem, ///< Global memory available on devicice. + hipDeviceAttributeUnifiedAddressing, ///< Cuda only. An unified address space shared with the + ///< host. + hipDeviceAttributeUnused2, ///< Previously hipDeviceAttributeUuid + hipDeviceAttributeWarpSize, ///< Warp size in threads. + hipDeviceAttributeMemoryPoolsSupported, ///< Device supports HIP Stream Ordered Memory Allocator + hipDeviceAttributeVirtualMemoryManagementSupported, ///< Device supports HIP virtual memory + ///< management + hipDeviceAttributeHostRegisterSupported, ///< Can device support host memory registration via + ///< hipHostRegister + hipDeviceAttributeMemoryPoolSupportedHandleTypes, ///< Supported handle mask for HIP Stream + ///< Ordered Memory Allocator + hipDeviceAttributeHostNumaId, ///< NUMA ID of the cpu node closest to the device, + ///< or -1 when NUMA isn't supported + + hipDeviceAttributeCudaCompatibleEnd = 9999, + hipDeviceAttributeAmdSpecificBegin = 10000, + + hipDeviceAttributeClockInstructionRate = + hipDeviceAttributeAmdSpecificBegin, ///< Frequency in khz of the timer used by the + ///< device-side "clock*" + hipDeviceAttributeUnused3, ///< Previously hipDeviceAttributeArch + hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, ///< Maximum Shared Memory PerMultiprocessor. + hipDeviceAttributeUnused4, ///< Previously hipDeviceAttributeGcnArch + hipDeviceAttributeUnused5, ///< Previously hipDeviceAttributeGcnArchName + hipDeviceAttributeHdpMemFlushCntl, ///< Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register + hipDeviceAttributeHdpRegFlushCntl, ///< Address of the HDP_REG_COHERENCY_FLUSH_CNTL register + hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc, ///< Supports cooperative launch on + ///< multiple devices with unmatched + ///< functions + hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim, ///< Supports cooperative launch on + ///< multiple devices with unmatched + ///< grid dimensions + hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim, ///< Supports cooperative launch on + ///< multiple devices with unmatched + ///< block dimensions + hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem, ///< Supports cooperative launch on + ///< multiple devices with unmatched + ///< shared memories + hipDeviceAttributeIsLargeBar, ///< Whether it is LargeBar + hipDeviceAttributeAsicRevision, ///< Revision of the GPU in this device + hipDeviceAttributeCanUseStreamWaitValue, ///< '1' if Device supports hipStreamWaitValue32() and + ///< hipStreamWaitValue64(), '0' otherwise. + hipDeviceAttributeImageSupport, ///< '1' if Device supports image, '0' otherwise. + hipDeviceAttributePhysicalMultiProcessorCount, ///< All available physical compute + ///< units for the device + hipDeviceAttributeFineGrainSupport, ///< '1' if Device supports fine grain, '0' otherwise + hipDeviceAttributeWallClockRate, ///< Constant frequency of wall clock in kilohertz. + hipDeviceAttributeNumberOfXccs, ///< The number of XCC(s) on the device + hipDeviceAttributeMaxAvailableVgprsPerThread, ///< Max number of available (directly or + ///< indirectly addressable) VGPRs per thread in + ///< DWORDs. + hipDeviceAttributePciChipId, ///< GPU Manufacturer device id + hipDeviceAttributeExpertSchedMode, ///< '1' if Device supports expert scheduling mode, + ///< '0' otherwise. + + hipDeviceAttributeAmdSpecificEnd = 19999, + hipDeviceAttributeVendorSpecificBegin = 20000, + // Extended attributes for vendors +} hipDeviceAttribute_t; + +// Flags that can be used with hipGetProcAddress. +/** Default flag. Equivalent to HIP_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM if compiled with + * -fgpu-default-stream=per-thread flag or HIP_API_PER_THREAD_DEFAULT_STREAM macro is + * defined.*/ +#define HIP_GET_PROC_ADDRESS_DEFAULT 0x0 + +/** Search for all symbols except the corresponding per-thread versions.*/ +#define HIP_GET_PROC_ADDRESS_LEGACY_STREAM 0x1 + +/** Search for all symbols including the per-thread versions. If a per-thread version cannot be + * found, returns the legacy version.*/ +#define HIP_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM 0x2 + +typedef enum hipDriverProcAddressQueryResult { + HIP_GET_PROC_ADDRESS_SUCCESS = 0, + HIP_GET_PROC_ADDRESS_SYMBOL_NOT_FOUND = 1, + HIP_GET_PROC_ADDRESS_VERSION_NOT_SUFFICIENT = 2 +} hipDriverProcAddressQueryResult; + +enum hipComputeMode { + hipComputeModeDefault = 0, + hipComputeModeExclusive = 1, + hipComputeModeProhibited = 2, + hipComputeModeExclusiveProcess = 3 +}; + +enum hipFlushGPUDirectRDMAWritesOptions { + hipFlushGPUDirectRDMAWritesOptionHost = 1 << 0, + hipFlushGPUDirectRDMAWritesOptionMemOps = 1 << 1 +}; + +enum hipGPUDirectRDMAWritesOrdering { + hipGPUDirectRDMAWritesOrderingNone = 0, + hipGPUDirectRDMAWritesOrderingOwner = 100, + hipGPUDirectRDMAWritesOrderingAllDevices = 200 +}; + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) + +#ifndef GENERIC_GRID_LAUNCH +#define GENERIC_GRID_LAUNCH 1 +#endif +#include +#include +#include +#include +#if defined(_MSC_VER) +#define HIP_DEPRECATED(msg) __declspec(deprecated(msg)) +#else // !defined(_MSC_VER) +#define HIP_DEPRECATED(msg) __attribute__((deprecated(msg))) +#endif // !defined(_MSC_VER) +#define HIP_DEPRECATED_MSG \ + "This API is marked as deprecated and might not be supported in future releases. For more " \ + "details please refer " \ + "https://github.com/ROCm/HIP/blob/develop/docs/reference/deprecated_api_list.md" +#define HIP_LAUNCH_PARAM_BUFFER_POINTER ((void*)0x01) +#define HIP_LAUNCH_PARAM_BUFFER_SIZE ((void*)0x02) +#define HIP_LAUNCH_PARAM_END ((void*)0x03) +#ifdef __cplusplus +#define __dparm(x) = x +#else +#define __dparm(x) +#endif +#ifdef __GNUC__ +#pragma GCC visibility push(default) +#endif +#ifdef __cplusplus +namespace hip_impl { +hipError_t hip_init(); +} // namespace hip_impl +#endif +// Structure definitions: +#ifdef __cplusplus +extern "C" { +#endif +//--- +// API-visible structures +typedef struct ihipCtx_t* hipCtx_t; +// Note many APIs also use integer deviceIds as an alternative to the device pointer: +typedef int hipDevice_t; +typedef enum hipDeviceP2PAttr { + hipDevP2PAttrPerformanceRank = 0, + hipDevP2PAttrAccessSupported, + hipDevP2PAttrNativeAtomicSupported, + hipDevP2PAttrHipArrayAccessSupported +} hipDeviceP2PAttr; +typedef enum hipDriverEntryPointQueryResult { + hipDriverEntryPointSuccess = 0, + hipDriverEntryPointSymbolNotFound = 1, + hipDriverEntryPointVersionNotSufficent = 2 +} hipDriverEntryPointQueryResult; +typedef struct ihipStream_t* hipStream_t; +#define hipIpcMemLazyEnablePeerAccess 0x01 +#define HIP_IPC_HANDLE_SIZE 64 +typedef struct hipIpcMemHandle_st { + char reserved[HIP_IPC_HANDLE_SIZE]; +} hipIpcMemHandle_t; +typedef struct hipIpcEventHandle_st { + char reserved[HIP_IPC_HANDLE_SIZE]; +} hipIpcEventHandle_t; +typedef struct ihipModule_t* hipModule_t; +typedef struct ihipModuleSymbol_t* hipFunction_t; +typedef struct ihipLinkState_t* hipLinkState_t; +typedef struct ihipLibrary_t* hipLibrary_t; +typedef struct ihipKernel_t* hipKernel_t; +/** + * HIP memory pool + */ +typedef struct ihipMemPoolHandle_t* hipMemPool_t; + +typedef struct hipFuncAttributes { + int binaryVersion; + int cacheModeCA; + size_t constSizeBytes; + size_t localSizeBytes; + int maxDynamicSharedSizeBytes; + int maxThreadsPerBlock; + int numRegs; + int preferredShmemCarveout; + int ptxVersion; + size_t sharedSizeBytes; +} hipFuncAttributes; +typedef struct ihipEvent_t* hipEvent_t; + +/** + * hipLimit + * + * @note In HIP device limit-related APIs, any input limit value other than those defined in the + * enum is treated as "UnsupportedLimit" by default. + */ +enum hipLimit_t { + hipLimitStackSize = 0x0, ///< Limit of stack size in bytes on the current device, per + ///< thread. The size is in units of 256 dwords, up to the + ///< limit of (128K - 16) + hipLimitPrintfFifoSize = 0x01, ///< Size limit in bytes of fifo used by printf call on the + ///< device. Currently not supported + hipLimitMallocHeapSize = 0x02, ///< Limit of heap size in bytes on the current device, should + ///< be less than the global memory size on the device + hipExtLimitScratchMin = 0x1000, ///< Minimum allowed value in bytes for scratch limit on this + ///< device. Valid only on Rocm device. This is read only. + hipExtLimitScratchMax = 0x1001, ///< Maximum allowed value in bytes for scratch limit on this + ///< device. Valid only on Rocm device. This is read only. + hipExtLimitScratchCurrent = 0x1002, ///< Current scratch limit threshold in bytes on this + ///< device. Must be between hipExtLimitScratchMin and + ///< hipExtLimitScratchMaxValid values. Valid only on Rocm + ///< device. This can be modified. + hipLimitRange ///< Supported limit range +}; + +/** + * Flags that can be used with hipStreamCreateWithFlags. + */ +// Flags that can be used with hipStreamCreateWithFlags. +/** Default stream creation flags. These are used with hipStreamCreate().*/ +#define hipStreamDefault 0x00 + +/** Stream does not implicitly synchronize with null stream.*/ +#define hipStreamNonBlocking 0x01 + +// Flags that can be used with hipEventCreateWithFlags. +/** Default flags.*/ +#define hipEventDefault 0x0 + +/** Waiting will yield CPU. Power-friendly and usage-friendly but may increase latency.*/ +#define hipEventBlockingSync 0x1 + +/** Disable event's capability to record timing information. May improve performance.*/ +#define hipEventDisableTiming 0x2 + +/** Event can support IPC. hipEventDisableTiming also must be set.*/ +#define hipEventInterprocess 0x4 + +// Flags that can be used with hipEventRecordWithFlags. +/** Default flag. */ +#define hipEventRecordDefault 0x00 + +/** Event is captured in the graph as an external event node when performing stream capture. */ +#define hipEventRecordExternal 0x01 + +//Flags that can be used with hipStreamWaitEvent. +/** Default flag. */ +#define hipEventWaitDefault 0x00 + +/** Wait is captured in the graph as an external event node when performing stream capture. */ +#define hipEventWaitExternal 0x01 + +/** Disable performing a system scope sequentially consistent memory fence when the event + * transitions from recording to recorded. This can be used for events that are only being + * used to measure timing, and do not require the event inspection operations + * (see ::hipEventSynchronize, ::hipEventQuery, and ::hipEventElapsedTime) to synchronize-with + * the work on which the recorded event (see ::hipEventRecord) is waiting. + * On some AMD GPU devices this can improve the accuracy of timing measurements by avoiding the + * cost of cache writeback and invalidation, and the performance impact of those actions on the + * execution of following work. */ +#define hipEventDisableSystemFence 0x20000000 + +/** Use a device-scope release when recording this event. This flag is useful to obtain more + * precise timings of commands between events. The flag is a no-op on CUDA platforms.*/ +#define hipEventReleaseToDevice 0x40000000 + +/** Use a system-scope release when recording this event. This flag is useful to make + * non-coherent host memory visible to the host. The flag is a no-op on CUDA platforms.*/ +#define hipEventReleaseToSystem 0x80000000 + +// Flags that can be used with hipGetDriverEntryPoint. +/** Default flag. Equivalent to hipEnablePerThreadDefaultStream if compiled with + * -fgpu-default-stream=per-thread flag or HIP_API_PER_THREAD_DEFAULT_STREAM macro is + * defined.*/ +#define hipEnableDefault 0x0 + +/** Search for all symbols except the corresponding per-thread versions.*/ +#define hipEnableLegacyStream 0x1 + +/** Search for all symbols including the per-thread versions. If a per-thread version cannot be + * found, returns the legacy version.*/ +#define hipEnablePerThreadDefaultStream 0x2 + +// Flags that can be used with hipHostMalloc/hipHostAlloc. +/** Default pinned memory allocation on the host.*/ +#define hipHostAllocDefault 0x0 + +/** Default pinned memory allocation on the host. + * @note This is the same definition as #hipHostAllocPortable.*/ +#define hipHostMallocDefault 0x0 + +/** Memory is considered allocated by all contexts.*/ +#define hipHostAllocPortable 0x1 + +/** Memory is considered allocated by all contexts. + * @note This is the same definition as #hipHostAllocPortable.*/ +#define hipHostMallocPortable 0x1 + +/** Map the allocation into the address space for the current device. The device pointer + * can be obtained with #hipHostGetDevicePointer.*/ +#define hipHostAllocMapped 0x2 + +/** Map the allocation into the address space for the current device. The device pointer + * can be obtained with #hipHostGetDevicePointer. + * @note This is the same #hipHostMallocMapped.*/ +#define hipHostMallocMapped 0x2 + +/** Allocates the memory as write-combined. On some system configurations, write-combined allocation + * may be transferred faster across the PCI Express bus, however, could have low read efficiency by + * most CPUs. It's a good option for data transfer from host to device via mapped pinned memory. + * @note This flag is only for CUDA source compatibility but not functional within HIP runtime, + * because the allocation path is currently not supported on the AMD platform.*/ +#define hipHostAllocWriteCombined 0x4 + +/** Allocates the memory as write-combined. On some system configurations, write-combined allocation + * may be transferred faster across the PCI Express bus, however, could have low read efficiency by + * most CPUs. It's a good option for data transfer from host to device via mapped pinned memory. + * @note This flag is the same definition as #hipHostAllocWriteCombined which is equivalent to + * cudaHostAllocWriteCombined. It is only for CUDA source compatibility but not functional within + * HIP runtime, because the allocation path is currently not supported on the AMD platform.*/ +#define hipHostMallocWriteCombined 0x4 + +/** + * Host memory will be forcedly allocated on extended fine grained system memory + * pool which is with MTYPE_UC. + * @note This allocation flag is applicable on AMD devices, except for Navi4X, in Linux only. + */ +#define hipHostMallocUncached 0x10000000 +#define hipHostAllocUncached hipHostMallocUncached + +/** + * Host memory allocation will follow numa policy set by user. + * @note This numa allocation flag is applicable on Linux, under development on Windows. + */ +#define hipHostMallocNumaUser 0x20000000 + +/** Allocate coherent memory. Overrides HIP_HOST_COHERENT for specific allocation.*/ +#define hipHostMallocCoherent 0x40000000 + +/** Allocate non-coherent memory. Overrides HIP_HOST_COHERENT for specific allocation.*/ +#define hipHostMallocNonCoherent 0x80000000 + +/** Memory can be accessed by any stream on any device*/ +#define hipMemAttachGlobal 0x01 + +/** Memory cannot be accessed by any stream on any device.*/ +#define hipMemAttachHost 0x02 + +/** Memory can only be accessed by a single stream on the associated device.*/ +#define hipMemAttachSingle 0x04 + +#define hipDeviceMallocDefault 0x0 + +/** Memory is allocated in fine grained region of device.*/ +#define hipDeviceMallocFinegrained 0x1 + +/** Memory represents a HSA signal.*/ +#define hipMallocSignalMemory 0x2 + +/** Memory allocated will be uncached. */ +#define hipDeviceMallocUncached 0x3 + +/** Memory allocated will be contiguous. */ +#define hipDeviceMallocContiguous 0x4 + +// Flags that can be used with hipHostRegister. +/** Memory is Mapped and Portable.*/ +#define hipHostRegisterDefault 0x0 + +/** Memory is considered registered by all contexts.*/ +#define hipHostRegisterPortable 0x1 + +/** Map the allocation into the address space for the current device. The device pointer + * can be obtained with #hipHostGetDevicePointer.*/ +#define hipHostRegisterMapped 0x2 + +/** The passed memory pointer is treated as pointing to some memory-mapped I/O space, e.g. + * belonging to a third-party PCIe device, and it will be marked as non cache-coherent and + * contiguous. + * */ +#define hipHostRegisterIoMemory 0x4 + +/** This flag is ignored On AMD devices.*/ +#define hipHostRegisterReadOnly 0x08 + +/** Coarse Grained host memory lock.*/ +#define hipExtHostRegisterCoarseGrained 0x8 + +/** Map host memory onto extended fine grained access host memory pool when enabled. + * It is applicable on AMD devices, except for Navi4X, in Linux only. + */ +#define hipExtHostRegisterUncached 0x80000000 + +/** Automatically select between Spin and Yield.*/ +#define hipDeviceScheduleAuto 0x0 + +/** Dedicate a CPU core to spin-wait. Provides lowest latency, but burns a CPU core and may + * consume more power.*/ +#define hipDeviceScheduleSpin 0x1 + +/** Yield the CPU to the operating system when waiting. May increase latency, but lowers power + * and is friendlier to other threads in the system.*/ +#define hipDeviceScheduleYield 0x2 +#define hipDeviceScheduleBlockingSync 0x4 +#define hipDeviceScheduleMask 0x7 +#define hipDeviceMapHost 0x8 +#define hipDeviceLmemResizeToMax 0x10 +/** Default HIP array allocation flag.*/ +#define hipArrayDefault 0x00 +#define hipArrayLayered 0x01 +#define hipArraySurfaceLoadStore 0x02 +#define hipArrayCubemap 0x04 +#define hipArrayTextureGather 0x08 +#define hipOccupancyDefault 0x00 +#define hipOccupancyDisableCachingOverride 0x01 +#define hipCooperativeLaunchMultiDeviceNoPreSync 0x01 +#define hipCooperativeLaunchMultiDeviceNoPostSync 0x02 +#define hipCpuDeviceId ((int)-1) +#define hipInvalidDeviceId ((int)-2) +// Flags that can be used with hipExtLaunch Set of APIs. +/** AnyOrderLaunch of kernels.*/ +#define hipExtAnyOrderLaunch 0x01 +// Flags to be used with hipStreamWaitValue32 and hipStreamWaitValue64. +#define hipStreamWaitValueGte 0x0 +#define hipStreamWaitValueEq 0x1 +#define hipStreamWaitValueAnd 0x2 +#define hipStreamWaitValueNor 0x3 + +/** Operations for hipStreamBatchMemOp*/ +typedef enum hipStreamBatchMemOpType { + hipStreamMemOpWaitValue32 = 0x1, + hipStreamMemOpWriteValue32 = 0x2, + hipStreamMemOpWaitValue64 = 0x4, + hipStreamMemOpWriteValue64 = 0x5, + hipStreamMemOpBarrier = 0x6, ///< Currently not supported + hipStreamMemOpFlushRemoteWrites = 0x3 ///< Currently not supported +} hipStreamBatchMemOpType; + +/** + * @brief Union representing batch memory operation parameters for HIP streams. + * + * hipStreamBatchMemOpParams is used to specify the parameters for batch memory + * operations in a HIP stream. This union supports various operations including + * waiting for a specific value, writing a value, and different flags for wait conditions. + * + * @details + * The union includes fields for different types of operations defined in the + * enum hipStreamBatchMemOpType: + * - hipStreamMemOpWaitValue32: Wait for a 32-bit value. + * - hipStreamMemOpWriteValue32: Write a 32-bit value. + * - hipStreamMemOpWaitValue64: Wait for a 64-bit value. + * - hipStreamMemOpWriteValue64: Write a 64-bit value. + * + * Each operation type includes an address, the value to wait for or write, flags, and an + * optional alias that is not relevant on AMD GPUs. Flags can be used to specify different + * wait conditions such as equality, bitwise AND, greater than or equal, and bitwise NOR. + * + * Example usage: + * @code + * hipStreamBatchMemOpParams myArray[2]; + * myArray[0].operation = hipStreamMemOpWaitValue32; + * myArray[0].waitValue.address = waitAddr1; + * myArray[0].waitValue.value = 0x1; + * myArray[0].waitValue.flags = CU_STREAM_WAIT_VALUE_EQ; + * + * myArray[1].operation = hipStreamMemOpWriteValue32; + * myArray[1].writeValue.address = writeAddr1; + * myArray[1].writeValue.value = 0x1; + * myArray[1].writeValue.flags = 0x0; + * + * result = hipStreamBatchMemOp(stream, 2, myArray, 0); + * @endcode + */ + +typedef union hipStreamBatchMemOpParams_union { + hipStreamBatchMemOpType operation; + struct hipStreamMemOpWaitValueParams_t { + hipStreamBatchMemOpType operation; + hipDeviceptr_t address; + union { + uint32_t value; + uint64_t value64; + }; + unsigned int flags; + hipDeviceptr_t alias; ///< Not valid for AMD backend. Initial value is unimportant + } waitValue; + struct hipStreamMemOpWriteValueParams_t { + hipStreamBatchMemOpType operation; + hipDeviceptr_t address; + union { + uint32_t value; + uint64_t value64; + }; + unsigned int flags; + hipDeviceptr_t alias; ///< Not valid for AMD backend. Initial value is unimportant + } writeValue; + struct hipStreamMemOpFlushRemoteWritesParams_t { + hipStreamBatchMemOpType operation; + unsigned int flags; + } flushRemoteWrites; ///< Currently not supported on AMD + struct hipStreamMemOpMemoryBarrierParams_t { + hipStreamBatchMemOpType operation; + unsigned int flags; + } memoryBarrier; ///< Currently not supported on AMD + uint64_t pad[6]; +} hipStreamBatchMemOpParams; + +/** + * @brief Structure representing node parameters for batch memory operations in HIP graphs. + * + * hipBatchMemOpNodeParams is used to specify the parameters for batch memory + * operations in HIP graphs. This struct includes the context to use for the operations, the + * number of operations, and an array of hipStreamBatchMemOpParams that describe the operations. + * + * @details + * The structure includes the following fields: + * - ctx: The HIP context to use for the operations. + * - count: The number of operations in the paramArray. + * - paramArray: A pointer to an array of hipStreamBatchMemOpParams. + * - flags: Flags to control the node. + * + * Example usage: + * @code + * hipBatchMemOpNodeParams nodeParams; + * nodeParams.ctx = context; + * nodeParams.count = ARRAY_SIZE; + * nodeParams.paramArray = myArray; + * nodeParams.flags = 0; + * + * Pass nodeParams to a HIP graph APIs hipGraphAddBatchMemOpNode, hipGraphBatchMemOpNodeGetParams, + * hipGraphBatchMemOpNodeSetParams, hipGraphExecBatchMemOpNodeSetParams + * @endcode + */ + +typedef struct hipBatchMemOpNodeParams { + hipCtx_t ctx; + unsigned int count; + hipStreamBatchMemOpParams* paramArray; + unsigned int flags; +} hipBatchMemOpNodeParams; + +// Stream per thread +/** Implicit stream per application thread.*/ +#define hipStreamPerThread ((hipStream_t)2) + +#define hipStreamLegacy ((hipStream_t)1) + +// Indicates that the external memory object is a dedicated resource +#define hipExternalMemoryDedicated 0x1 +/** + * HIP Memory Advise values + * + * @note This memory advise enumeration is used on Linux, not Windows. + */ +typedef enum hipMemoryAdvise { + hipMemAdviseSetReadMostly = 1, ///< Data will mostly be read and only occassionally + ///< be written to + hipMemAdviseUnsetReadMostly = 2, ///< Undo the effect of hipMemAdviseSetReadMostly + hipMemAdviseSetPreferredLocation = 3, ///< Set the preferred location for the data as + ///< the specified device + hipMemAdviseUnsetPreferredLocation = 4, ///< Clear the preferred location for the data + hipMemAdviseSetAccessedBy = 5, ///< Data will be accessed by the specified device + ///< so prevent page faults as much as possible + hipMemAdviseUnsetAccessedBy = 6, ///< Let HIP to decide on the page faulting policy + ///< for the specified device + hipMemAdviseSetCoarseGrain = 100, ///< The default memory model is fine-grain. That allows + ///< coherent operations between host and device, while + ///< executing kernels. The coarse-grain can be used + ///< for data that only needs to be coherent at dispatch + ///< boundaries for better performance + hipMemAdviseUnsetCoarseGrain = 101 ///< Restores cache coherency policy back to fine-grain +} hipMemoryAdvise; +/** + * HIP Coherency Mode + */ +typedef enum hipMemRangeCoherencyMode { + hipMemRangeCoherencyModeFineGrain = 0, ///< Updates to memory with this attribute can be + ///< done coherently from all devices + hipMemRangeCoherencyModeCoarseGrain = 1, ///< Writes to memory with this attribute can be + ///< performed by a single device at a time + hipMemRangeCoherencyModeIndeterminate = 2 ///< Memory region queried contains subregions with + ///< both hipMemRangeCoherencyModeFineGrain and + ///< hipMemRangeCoherencyModeCoarseGrain attributes +} hipMemRangeCoherencyMode; +/** + * HIP range attributes + */ +typedef enum hipMemRangeAttribute { + hipMemRangeAttributeReadMostly = 1, ///< Whether the range will mostly be read and + ///< only occassionally be written to + hipMemRangeAttributePreferredLocation = 2, ///< The preferred location of the range + hipMemRangeAttributeAccessedBy = 3, ///< Memory range has hipMemAdviseSetAccessedBy + ///< set for the specified device + hipMemRangeAttributeLastPrefetchLocation = 4, ///< The last location to where the range was + ///< prefetched + hipMemRangeAttributeCoherencyMode = 100, ///< Returns coherency mode + ///< @ref hipMemRangeCoherencyMode for the range +} hipMemRangeAttribute; + +/** + * HIP memory pool attributes + */ +typedef enum hipMemPoolAttr { + /** + * (value type = int) + * Allow @p hipMemAllocAsync to use memory asynchronously freed + * in another streams as long as a stream ordering dependency + * of the allocating stream on the free action exists. + * hip events and null stream interactions can create the required + * stream ordered dependencies. (default enabled) + */ + hipMemPoolReuseFollowEventDependencies = 0x1, + /** + * (value type = int) + * Allow reuse of already completed frees when there is no dependency + * between the free and allocation. (default enabled) + */ + hipMemPoolReuseAllowOpportunistic = 0x2, + /** + * (value type = int) + * Allow @p hipMemAllocAsync to insert new stream dependencies + * in order to establish the stream ordering required to reuse + * a piece of memory released by cuFreeAsync (default enabled). + */ + hipMemPoolReuseAllowInternalDependencies = 0x3, + /** + * (value type = uint64_t) + * Amount of reserved memory in bytes to hold onto before trying + * to release memory back to the OS. When more than the release + * threshold bytes of memory are held by the memory pool, the + * allocator will try to release memory back to the OS on the + * next call to stream, event or context synchronize. (default 0) + */ + hipMemPoolAttrReleaseThreshold = 0x4, + /** + * (value type = uint64_t) + * Amount of backing memory currently allocated for the mempool. + */ + hipMemPoolAttrReservedMemCurrent = 0x5, + /** + * (value type = uint64_t) + * High watermark of backing memory allocated for the mempool since the + * last time it was reset. High watermark can only be reset to zero. + */ + hipMemPoolAttrReservedMemHigh = 0x6, + /** + * (value type = uint64_t) + * Amount of memory from the pool that is currently in use by the application. + */ + hipMemPoolAttrUsedMemCurrent = 0x7, + /** + * (value type = uint64_t) + * High watermark of the amount of memory from the pool that was in use by the application since + * the last time it was reset. High watermark can only be reset to zero. + */ + hipMemPoolAttrUsedMemHigh = 0x8 +} hipMemPoolAttr; + +/** + * Specifies the memory protection flags for mapping + * + */ +typedef enum hipMemAccessFlags { + hipMemAccessFlagsProtNone = 0, ///< Default, make the address range not accessible + hipMemAccessFlagsProtRead = 1, ///< Set the address range read accessible + hipMemAccessFlagsProtReadWrite = 3 ///< Set the address range read-write accessible +} hipMemAccessFlags; +/** + * Memory access descriptor structure is used to specify memory access + * permissions for a virtual memory region in Virtual Memory Management API. + * This structure changes read, and write permissions for + * specific memory regions. + */ +typedef struct hipMemAccessDesc { + hipMemLocation location; ///< Location on which the accessibility has to change + hipMemAccessFlags flags; ///< Accessibility flags to set +} hipMemAccessDesc; +/** + * Defines the allocation types + */ +typedef enum hipMemAllocationType { + hipMemAllocationTypeInvalid = 0x0, + /** This allocation type is 'pinned', i.e. cannot migrate from its current + * location while the application is actively using it + */ + hipMemAllocationTypePinned = 0x1, + hipMemAllocationTypeManaged = 0x2, + hipMemAllocationTypeUncached = 0x40000000, + hipMemAllocationTypeMax = 0x7FFFFFFF +} hipMemAllocationType; +/** + * Flags for specifying handle types for memory pool allocations + * + */ +typedef enum hipMemAllocationHandleType { + hipMemHandleTypeNone = 0x0, ///< Does not allow any export mechanism + hipMemHandleTypePosixFileDescriptor = + 0x1, ///< Allows a file descriptor for exporting. Permitted only on POSIX systems + hipMemHandleTypeWin32 = 0x2, ///< Allows a Win32 NT handle for exporting. (HANDLE) + hipMemHandleTypeWin32Kmt = 0x4 ///< Allows a Win32 KMT handle for exporting. (D3DKMT_HANDLE) +} hipMemAllocationHandleType; +/** + * Specifies the properties of allocations made from the pool. + */ +typedef struct hipMemPoolProps { + hipMemAllocationType + allocType; ///< Allocation type. Currently must be specified as @p hipMemAllocationTypePinned + hipMemAllocationHandleType + handleTypes; ///< Handle types that will be supported by allocations from the pool + hipMemLocation location; ///< Location where allocations should reside + /** + * Windows-specific LPSECURITYATTRIBUTES required when @p hipMemHandleTypeWin32 is specified + */ + void* win32SecurityAttributes; + size_t maxSize; ///< Maximum pool size. When set to 0, defaults to a system dependent value + unsigned char reserved[56]; ///< Reserved for future use, must be 0 +} hipMemPoolProps; +/** + * Opaque data structure for exporting a pool allocation + */ +typedef struct hipMemPoolPtrExportData { + unsigned char reserved[64]; +} hipMemPoolPtrExportData; + +/** + * @warning On AMD devices and some Nvidia devices, these hints and controls are ignored. + */ +typedef enum hipFuncAttribute { + hipFuncAttributeMaxDynamicSharedMemorySize = + 8, ///< The maximum number of bytes requested for dynamically allocated shared memory + hipFuncAttributePreferredSharedMemoryCarveout = + 9, ///< Sets the percentage of total shared memory allocated as the shared memory carveout + hipFuncAttributeMax +} hipFuncAttribute; +/** + * @warning On AMD devices and some Nvidia devices, these hints and controls are ignored. + */ +typedef enum hipFuncCache_t { + hipFuncCachePreferNone, ///< no preference for shared memory or L1 (default) + hipFuncCachePreferShared, ///< prefer larger shared memory and smaller L1 cache + hipFuncCachePreferL1, ///< prefer larger L1 cache and smaller shared memory + hipFuncCachePreferEqual, ///< prefer equal size L1 cache and shared memory +} hipFuncCache_t; +/** + * @warning On AMD devices and some Nvidia devices, these hints and controls are ignored. + */ +typedef enum hipSharedMemConfig { + hipSharedMemBankSizeDefault, ///< The compiler selects a device-specific value for the banking. + hipSharedMemBankSizeFourByte, ///< Shared mem is banked at 4-bytes intervals and performs best + ///< when adjacent threads access data 4 bytes apart. + hipSharedMemBankSizeEightByte ///< Shared mem is banked at 8-byte intervals and performs best + ///< when adjacent threads access data 4 bytes apart. +} hipSharedMemConfig; +/** + * Struct for data in 3D + */ +typedef struct dim3 { + uint32_t x; ///< x + uint32_t y; ///< y + uint32_t z; ///< z +#ifdef __cplusplus + constexpr __host__ __device__ dim3(uint32_t _x = 1, uint32_t _y = 1, uint32_t _z = 1) + : x(_x), y(_y), z(_z) {}; +#endif +} dim3; +/** + * struct hipLaunchParams_t + */ +typedef struct hipLaunchParams_t { + void* func; ///< Device function symbol + dim3 gridDim; ///< Grid dimensions + dim3 blockDim; ///< Block dimensions + void** args; ///< Arguments + size_t sharedMem; ///< Shared memory + hipStream_t stream; ///< Stream identifier +} hipLaunchParams; +/** + * struct hipFunctionLaunchParams_t + */ +typedef struct hipFunctionLaunchParams_t { + hipFunction_t function; ///< Kernel to launch + unsigned int gridDimX; ///< Width(X) of grid in blocks + unsigned int gridDimY; ///< Height(Y) of grid in blocks + unsigned int gridDimZ; ///< Depth(Z) of grid in blocks + unsigned int blockDimX; ///< X dimension of each thread block + unsigned int blockDimY; ///< Y dimension of each thread block + unsigned int blockDimZ; ///< Z dimension of each thread block + unsigned int sharedMemBytes; ///< Shared memory + hipStream_t hStream; ///< Stream identifier + void** kernelParams; ///< Kernel parameters +} hipFunctionLaunchParams; +typedef enum hipExternalMemoryHandleType_enum { + hipExternalMemoryHandleTypeOpaqueFd = 1, + hipExternalMemoryHandleTypeOpaqueWin32 = 2, + hipExternalMemoryHandleTypeOpaqueWin32Kmt = 3, + hipExternalMemoryHandleTypeD3D12Heap = 4, + hipExternalMemoryHandleTypeD3D12Resource = 5, + hipExternalMemoryHandleTypeD3D11Resource = 6, + hipExternalMemoryHandleTypeD3D11ResourceKmt = 7, + hipExternalMemoryHandleTypeNvSciBuf = 8 +} hipExternalMemoryHandleType; +typedef struct hipExternalMemoryHandleDesc_st { + hipExternalMemoryHandleType type; + union { + int fd; + struct { + void* handle; + const void* name; + } win32; + const void* nvSciBufObject; + } handle; + unsigned long long size; + unsigned int flags; + unsigned int reserved[16]; +} hipExternalMemoryHandleDesc; +typedef struct hipExternalMemoryBufferDesc_st { + unsigned long long offset; + unsigned long long size; + unsigned int flags; + unsigned int reserved[16]; +} hipExternalMemoryBufferDesc; +typedef struct hipExternalMemoryMipmappedArrayDesc_st { + unsigned long long offset; + hipChannelFormatDesc formatDesc; + hipExtent extent; + unsigned int flags; + unsigned int numLevels; +} hipExternalMemoryMipmappedArrayDesc; +typedef void* hipExternalMemory_t; +typedef enum hipExternalSemaphoreHandleType_enum { + hipExternalSemaphoreHandleTypeOpaqueFd = 1, + hipExternalSemaphoreHandleTypeOpaqueWin32 = 2, + hipExternalSemaphoreHandleTypeOpaqueWin32Kmt = 3, + hipExternalSemaphoreHandleTypeD3D12Fence = 4, + hipExternalSemaphoreHandleTypeD3D11Fence = 5, + hipExternalSemaphoreHandleTypeNvSciSync = 6, + hipExternalSemaphoreHandleTypeKeyedMutex = 7, + hipExternalSemaphoreHandleTypeKeyedMutexKmt = 8, + hipExternalSemaphoreHandleTypeTimelineSemaphoreFd = 9, + hipExternalSemaphoreHandleTypeTimelineSemaphoreWin32 = 10 +} hipExternalSemaphoreHandleType; +typedef struct hipExternalSemaphoreHandleDesc_st { + hipExternalSemaphoreHandleType type; + union { + int fd; + struct { + void* handle; + const void* name; + } win32; + const void* NvSciSyncObj; + } handle; + unsigned int flags; + unsigned int reserved[16]; +} hipExternalSemaphoreHandleDesc; +typedef void* hipExternalSemaphore_t; +typedef struct hipExternalSemaphoreSignalParams_st { + struct { + struct { + unsigned long long value; + } fence; + union { + void* fence; + unsigned long long reserved; + } nvSciSync; + struct { + unsigned long long key; + } keyedMutex; + unsigned int reserved[12]; + } params; + unsigned int flags; + unsigned int reserved[16]; +} hipExternalSemaphoreSignalParams; +/** + * External semaphore wait parameters, compatible with driver type + */ +typedef struct hipExternalSemaphoreWaitParams_st { + struct { + struct { + unsigned long long value; + } fence; + union { + void* fence; + unsigned long long reserved; + } nvSciSync; + struct { + unsigned long long key; + unsigned int timeoutMs; + } keyedMutex; + unsigned int reserved[10]; + } params; + unsigned int flags; + unsigned int reserved[16]; +} hipExternalSemaphoreWaitParams; + +#if __HIP_HAS_GET_PCH +/** + * Internal use only. This API may change in the future + * Pre-Compiled header for online compilation + */ +void __hipGetPCH(const char** pch, unsigned int* size); +#endif + +/** + * HIP Access falgs for Interop resources. + */ +typedef enum hipGraphicsRegisterFlags { + hipGraphicsRegisterFlagsNone = 0, + hipGraphicsRegisterFlagsReadOnly = 1, ///< HIP will not write to this registered resource, read only + hipGraphicsRegisterFlagsWriteDiscard = + 2, ///< HIP will only write and will not read from this registered resource, write only + hipGraphicsRegisterFlagsSurfaceLoadStore = 4, ///< HIP will bind this resource to a surface, read and write + hipGraphicsRegisterFlagsTextureGather = + 8 ///< HIP will perform texture gather operations on this registered resource, read and write or read only +} hipGraphicsRegisterFlags; + +typedef struct _hipGraphicsResource hipGraphicsResource; + +typedef hipGraphicsResource* hipGraphicsResource_t; + +/** + * An opaque value that represents a hip graph + */ +typedef struct ihipGraph* hipGraph_t; +/** + * An opaque value that represents a hip graph node + */ +typedef struct hipGraphNode* hipGraphNode_t; +/** + * An opaque value that represents a hip graph Exec + */ +typedef struct hipGraphExec* hipGraphExec_t; + +/** + * An opaque value that represents a user obj + */ +typedef struct hipUserObject* hipUserObject_t; + + +/** + * hipGraphNodeType + */ +typedef enum hipGraphNodeType { + hipGraphNodeTypeKernel = 0, ///< GPU kernel node + hipGraphNodeTypeMemcpy = 1, ///< Memcpy node + hipGraphNodeTypeMemset = 2, ///< Memset node + hipGraphNodeTypeHost = 3, ///< Host (executable) node + hipGraphNodeTypeGraph = 4, ///< Node which executes an embedded graph + hipGraphNodeTypeEmpty = 5, ///< Empty (no-op) node + hipGraphNodeTypeWaitEvent = 6, ///< External event wait node + hipGraphNodeTypeEventRecord = 7, ///< External event record node + hipGraphNodeTypeExtSemaphoreSignal = 8, ///< External Semaphore signal node + hipGraphNodeTypeExtSemaphoreWait = 9, ///< External Semaphore wait node + hipGraphNodeTypeMemAlloc = 10, ///< Memory alloc node + hipGraphNodeTypeMemFree = 11, ///< Memory free node + hipGraphNodeTypeMemcpyFromSymbol = 12, ///< MemcpyFromSymbol node + hipGraphNodeTypeMemcpyToSymbol = 13, ///< MemcpyToSymbol node + hipGraphNodeTypeBatchMemOp = 14, ///< BatchMemOp node + hipGraphNodeTypeCount +} hipGraphNodeType; + +typedef void (*hipHostFn_t)(void* userData); +typedef struct hipHostNodeParams { + hipHostFn_t fn; + void* userData; +} hipHostNodeParams; +typedef struct hipKernelNodeParams { + dim3 blockDim; + void** extra; + void* func; + dim3 gridDim; + void** kernelParams; + unsigned int sharedMemBytes; +} hipKernelNodeParams; +typedef struct hipMemsetParams { + void* dst; + unsigned int elementSize; + size_t height; + size_t pitch; + unsigned int value; + size_t width; +} hipMemsetParams; + +typedef struct hipMemAllocNodeParams { + hipMemPoolProps poolProps; ///< Pool properties, which contain where + ///< the location should reside + const hipMemAccessDesc* accessDescs; ///< The number of memory access descriptors. + size_t accessDescCount; ///< The number of access descriptors. + ///< Must not be bigger than the number of GPUs + size_t bytesize; ///< The size of the requested allocation in bytes + void* dptr; ///< Returned device address of the allocation +} hipMemAllocNodeParams; + +/** + * Specifies performance hint with hipAccessPolicyWindow + */ +typedef enum hipAccessProperty { + hipAccessPropertyNormal = 0, ///< Normal cache persistence. + hipAccessPropertyStreaming = 1, ///< Streaming access is less likely to persist from cache + hipAccessPropertyPersisting = 2, ///< Persisting access is more likely to persist in cache +} hipAccessProperty; + +/*** + * Specifies access policy for a window, a contiguous extent of memory + * beginning at base_ptr and ending at base_ptr + num_bytes. + */ +typedef struct hipAccessPolicyWindow { + void* base_ptr; ///< Starting address of the access policy window + hipAccessProperty hitProp; ///< hipAccessProperty set for hit + float hitRatio; ///< hitRatio specifies percentage of lines assigned hitProp + hipAccessProperty missProp; ///< hipAccessProperty set for miss + size_t num_bytes; ///< Size in bytes of the window policy. +} hipAccessPolicyWindow; + +/** + * Memory Synchronization Domain map + */ +typedef struct hipLaunchMemSyncDomainMap { + unsigned char default_; /**< The default domain ID to use for designated kernels */ + unsigned char remote; /**< The remote domain ID to use for designated kernels */ +} hipLaunchMemSyncDomainMap; + +/** + * Memory Synchronization Domain + */ +typedef enum hipLaunchMemSyncDomain { + hipLaunchMemSyncDomainDefault = 0, /**< Launch kernels in the default domain */ + hipLaunchMemSyncDomainRemote = 1 /**< Launch kernels in the remote domain */ +} hipLaunchMemSyncDomain; + +/** + * Stream Synchronization Policy. + * Can be set with hipStreamSetAttribute + */ +typedef enum hipSynchronizationPolicy { + hipSyncPolicyAuto = 1, /**< Default Synchronization Policy. Host thread waits actively */ + hipSyncPolicySpin = 2, /**< Host thread spins in tight loop waiting for completition */ + hipSyncPolicyYield = 3, /**< Host spins but yields to other threads, reducing CPU usage */ + hipSyncPolicyBlockingSync = 4 /**< Host thread blocks (sleeps) until the stream completes */ +} hipSynchronizationPolicy; + +/** + * Launch Attribute ID + */ +typedef enum hipLaunchAttributeID { + hipLaunchAttributeAccessPolicyWindow = 1, ///< Valid for Streams, graph nodes, launches + hipLaunchAttributeCooperative = 2, ///< Valid for graph nodes, launches + hipLaunchAttributeSynchronizationPolicy = 3, ///< Valid for streams + hipLaunchAttributePriority = 8, ///< Valid for graph node, streams, launches + hipLaunchAttributeMemSyncDomainMap = 9, ///< Valid for streams, graph nodes, launches + hipLaunchAttributeMemSyncDomain = 10, ///< Valid for streams, graph nodes, launches + hipLaunchAttributeMax +} hipLaunchAttributeID; + + +/** + * Launch Attribute Value + */ +typedef union hipLaunchAttributeValue { + char pad[64]; ///< 64 byte padding + hipAccessPolicyWindow + accessPolicyWindow; ///< Value of launch attribute ::hipLaunchAttributeAccessPolicyWindow. + int cooperative; ///< Value of launch attribute ::hipLaunchAttributeCooperative. Indicates + ///< whether the kernel is cooperative. + int priority; ///< Value of launch attribute :: hipLaunchAttributePriority. Execution priority of + ///< kernel + hipSynchronizationPolicy + syncPolicy; ///< Value of launch attribute :: hipLaunchAttributeSynchronizationPolicy. Used + ///< to work queued up in stream + hipLaunchMemSyncDomainMap + memSyncDomainMap; ///< Value of launch attribute hipLaunchAttributeMemSyncDomainMap + hipLaunchMemSyncDomain + memSyncDomain; ///< Value of launch attribute hipLaunchAttributeMemSyncDomain +} hipLaunchAttributeValue; + +/** + * Stream attributes + */ +#define hipStreamAttrID hipLaunchAttributeID +#define hipStreamAttributeAccessPolicyWindow hipLaunchAttributeAccessPolicyWindow +#define hipStreamAttributeSynchronizationPolicy hipLaunchAttributeSynchronizationPolicy +#define hipStreamAttributeMemSyncDomainMap hipLaunchAttributeMemSyncDomainMap +#define hipStreamAttributeMemSyncDomain hipLaunchAttributeMemSyncDomain +#define hipStreamAttributePriority hipLaunchAttributePriority + +#define hipStreamAttrValue hipLaunchAttributeValue + +/** + * Kernel node attributeID + */ +#define hipKernelNodeAttrID hipLaunchAttributeID +#define hipKernelNodeAttributeAccessPolicyWindow hipLaunchAttributeAccessPolicyWindow +#define hipKernelNodeAttributeCooperative hipLaunchAttributeCooperative +#define hipKernelNodeAttributePriority hipLaunchAttributePriority + +/** + * Kernel node attribute value + */ +#define hipKernelNodeAttrValue hipLaunchAttributeValue + +/** + * hip Drv attributes + */ +#define hipDrvLaunchAttributeCooperative hipLaunchAttributeCooperative + +#define hipDrvLaunchAttributeID hipLaunchAttributeID +#define hipDrvLaunchAttributeValue hipLaunchAttributeValue +#define hipDrvLaunchAttribute hipLaunchAttribute + +/** + * Graph execution update result + */ +typedef enum hipGraphExecUpdateResult { + hipGraphExecUpdateSuccess = 0x0, ///< The update succeeded + hipGraphExecUpdateError = 0x1, ///< The update failed for an unexpected reason which is described + ///< in the return value of the function + hipGraphExecUpdateErrorTopologyChanged = 0x2, ///< The update failed because the topology changed + hipGraphExecUpdateErrorNodeTypeChanged = 0x3, ///< The update failed because a node type changed + hipGraphExecUpdateErrorFunctionChanged = + 0x4, ///< The update failed because the function of a kernel node changed + hipGraphExecUpdateErrorParametersChanged = + 0x5, ///< The update failed because the parameters changed in a way that is not supported + hipGraphExecUpdateErrorNotSupported = + 0x6, ///< The update failed because something about the node is not supported + hipGraphExecUpdateErrorUnsupportedFunctionChange = 0x7 +} hipGraphExecUpdateResult; + +typedef enum hipStreamCaptureMode { + hipStreamCaptureModeGlobal = 0, + hipStreamCaptureModeThreadLocal, + hipStreamCaptureModeRelaxed +} hipStreamCaptureMode; +typedef enum hipStreamCaptureStatus { + hipStreamCaptureStatusNone = 0, ///< Stream is not capturing + hipStreamCaptureStatusActive, ///< Stream is actively capturing + hipStreamCaptureStatusInvalidated ///< Stream is part of a capture sequence that has been + ///< invalidated, but not terminated +} hipStreamCaptureStatus; + +typedef enum hipStreamUpdateCaptureDependenciesFlags { + hipStreamAddCaptureDependencies = 0, ///< Add new nodes to the dependency set + hipStreamSetCaptureDependencies, ///< Replace the dependency set with the new nodes +} hipStreamUpdateCaptureDependenciesFlags; + +typedef enum hipGraphMemAttributeType { + hipGraphMemAttrUsedMemCurrent = + 0, ///< Amount of memory, in bytes, currently associated with graphs + hipGraphMemAttrUsedMemHigh, ///< High watermark of memory, in bytes, associated with graphs since + ///< the last time. + hipGraphMemAttrReservedMemCurrent, ///< Amount of memory, in bytes, currently allocated for + ///< graphs. + hipGraphMemAttrReservedMemHigh, ///< High watermark of memory, in bytes, currently allocated for + ///< graphs +} hipGraphMemAttributeType; +typedef enum hipUserObjectFlags { + hipUserObjectNoDestructorSync = 0x1, ///< Destructor execution is not synchronized. +} hipUserObjectFlags; + +typedef enum hipUserObjectRetainFlags { + hipGraphUserObjectMove = 0x1, ///< Add new reference or retain. +} hipUserObjectRetainFlags; + +typedef enum hipGraphInstantiateFlags { + hipGraphInstantiateFlagAutoFreeOnLaunch = + 1, ///< Automatically free memory allocated in a graph before relaunching. + hipGraphInstantiateFlagUpload = 2, ///< Automatically upload the graph after instantiation. + hipGraphInstantiateFlagDeviceLaunch = + 4, ///< Instantiate the graph to be launched from the device. + hipGraphInstantiateFlagUseNodePriority = + 8, ///< Run the graph using the per-node priority attributes rather than the priority of the + ///< stream it is launched into. +} hipGraphInstantiateFlags; + +enum hipGraphDebugDotFlags { + hipGraphDebugDotFlagsVerbose = + 1 << 0, /**< Output all debug data as if every debug flag is enabled */ + hipGraphDebugDotFlagsKernelNodeParams = 1 << 2, /**< Adds hipKernelNodeParams to output */ + hipGraphDebugDotFlagsMemcpyNodeParams = 1 << 3, /**< Adds hipMemcpy3DParms to output */ + hipGraphDebugDotFlagsMemsetNodeParams = 1 << 4, /**< Adds hipMemsetParams to output */ + hipGraphDebugDotFlagsHostNodeParams = 1 << 5, /**< Adds hipHostNodeParams to output */ + hipGraphDebugDotFlagsEventNodeParams = + 1 << 6, /**< Adds hipEvent_t handle from record and wait nodes to output */ + hipGraphDebugDotFlagsExtSemasSignalNodeParams = + 1 << 7, /**< Adds hipExternalSemaphoreSignalNodeParams values to output */ + hipGraphDebugDotFlagsExtSemasWaitNodeParams = + 1 << 8, /**< Adds hipExternalSemaphoreWaitNodeParams to output */ + hipGraphDebugDotFlagsKernelNodeAttributes = + 1 << 9, /**< Adds hipKernelNodeAttrID values to output */ + hipGraphDebugDotFlagsHandles = + 1 << 10 /**< Adds node handles and every kernel function handle to output */ +}; + +/** + * hipGraphInstantiateWithParams results + */ +typedef enum hipGraphInstantiateResult { + hipGraphInstantiateSuccess = 0, /**< Instantiation Success */ + hipGraphInstantiateError = 1, /**< Instantiation failed for an + unexpected reason which is described in the return value of the function */ + hipGraphInstantiateInvalidStructure = 2, /**< Instantiation failed due + to invalid structure, such as cycles */ + hipGraphInstantiateNodeOperationNotSupported = 3, /**< Instantiation for device launch failed + because the graph contained an unsupported operation */ + hipGraphInstantiateMultipleDevicesNotSupported = 4, /**< Instantiation for device launch failed + due to the nodes belonging to different contexts */ +} hipGraphInstantiateResult; + +/** + * Graph Instantiation parameters + */ +typedef struct hipGraphInstantiateParams { + hipGraphNode_t errNode_out; /**< The node which caused instantiation to fail, if any*/ + unsigned long long flags; /**< Instantiation flags */ + hipGraphInstantiateResult result_out; /**< Whether instantiation was successful. + If it failed, the reason why */ + hipStream_t uploadStream; /**< Upload stream */ +} hipGraphInstantiateParams; + + +/** + * Memory allocation properties + */ +typedef struct hipMemAllocationProp { + hipMemAllocationType type; ///< Memory allocation type + union { + hipMemAllocationHandleType requestedHandleType; ///< Requested handle type + hipMemAllocationHandleType requestedHandleTypes; ///< Requested handle types + }; + hipMemLocation location; ///< Memory location + void* win32HandleMetaData; ///< Metadata for Win32 handles + struct { + unsigned char compressionType; ///< Compression type + unsigned char gpuDirectRDMACapable; ///< RDMA capable + unsigned short usage; ///< Usage + } allocFlags; +} hipMemAllocationProp; + +/** + * External semaphore signal node parameters + */ +typedef struct hipExternalSemaphoreSignalNodeParams { + ///< Array containing external semaphore handles. + hipExternalSemaphore_t* extSemArray; + ///< Array containing parameters of external signal semaphore. + const hipExternalSemaphoreSignalParams* paramsArray; + ///< Total number of handles and parameters contained in extSemArray and paramsArray. + unsigned int numExtSems; +} hipExternalSemaphoreSignalNodeParams; + +/** + * External semaphore wait node parameters + */ +typedef struct hipExternalSemaphoreWaitNodeParams { + ///< Array containing external semaphore handles. + hipExternalSemaphore_t* extSemArray; + ///< Array containing parameters of external wait semaphore. + const hipExternalSemaphoreWaitParams* paramsArray; + ///< Total number of handles and parameters contained in extSemArray and paramsArray. + unsigned int numExtSems; +} hipExternalSemaphoreWaitNodeParams; + +/** + * Generic handle for memory allocation + */ +typedef struct ihipMemGenericAllocationHandle* hipMemGenericAllocationHandle_t; + +/** + * Flags for granularity + */ +typedef enum hipMemAllocationGranularity_flags { + hipMemAllocationGranularityMinimum = 0x0, ///< Minimum granularity + hipMemAllocationGranularityRecommended = 0x1 ///< Recommended granularity for performance +} hipMemAllocationGranularity_flags; + +/** + * Memory handle type + */ +typedef enum hipMemHandleType { + hipMemHandleTypeGeneric = 0x0 ///< Generic handle type +} hipMemHandleType; + +/** + * Memory operation types + */ +typedef enum hipMemOperationType { + hipMemOperationTypeMap = 0x1, ///< Map operation + hipMemOperationTypeUnmap = 0x2 ///< Unmap operation +} hipMemOperationType; + +/** + * Subresource types for sparse arrays + */ +typedef enum hipArraySparseSubresourceType { + hipArraySparseSubresourceTypeSparseLevel = 0x0, ///< Sparse level + hipArraySparseSubresourceTypeMiptail = 0x1 ///< Miptail +} hipArraySparseSubresourceType; + +/** + * Map info for arrays + */ +typedef struct hipArrayMapInfo { + hipResourceType resourceType; ///< Resource type + union { + hipMipmappedArray mipmap; + hipArray_t array; + } resource; + hipArraySparseSubresourceType subresourceType; ///< Sparse subresource type + union { + struct { + unsigned int + level; ///< For mipmapped arrays must be a valid mipmap level. For arrays must be zero + unsigned int + layer; ///< For layered arrays must be a valid layer index. Otherwise, must be zero + unsigned int offsetX; ///< X offset in elements + unsigned int offsetY; ///< Y offset in elements + unsigned int offsetZ; ///< Z offset in elements + unsigned int extentWidth; ///< Width in elements + unsigned int extentHeight; ///< Height in elements + unsigned int extentDepth; ///< Depth in elements + } sparseLevel; + struct { + unsigned int + layer; ///< For layered arrays must be a valid layer index. Otherwise, must be zero + unsigned long long offset; ///< Offset within mip tail + unsigned long long size; ///< Extent in bytes + } miptail; + } subresource; + hipMemOperationType memOperationType; ///< Memory operation type + hipMemHandleType memHandleType; ///< Memory handle type + union { + hipMemGenericAllocationHandle_t memHandle; + } memHandle; + unsigned long long offset; ///< Offset within the memory + unsigned int deviceBitMask; ///< Device ordinal bit mask + unsigned int flags; ///< flags for future use, must be zero now. + unsigned int reserved[2]; ///< Reserved for future use, must be zero now. +} hipArrayMapInfo; + +/** + * Memcpy node params + */ +typedef struct hipMemcpyNodeParams { + int flags; ///< Must be zero. + int reserved[3]; ///< Must be zero. + hipMemcpy3DParms copyParams; ///< Params set for the memory copy. +} hipMemcpyNodeParams; + +/** + * Child graph node params + */ +typedef struct hipChildGraphNodeParams { + hipGraph_t graph; ///< Either the child graph to clone into the node, or + ///< a handle to the graph possesed by the node used during query +} hipChildGraphNodeParams; + +/** + * Event record node params + */ +typedef struct hipEventWaitNodeParams { + hipEvent_t event; ///< Event to wait on +} hipEventWaitNodeParams; + +/** + * Event record node params + */ +typedef struct hipEventRecordNodeParams { + hipEvent_t event; ///< The event to be recorded when node executes +} hipEventRecordNodeParams; + +/** + * Memory free node params + */ +typedef struct hipMemFreeNodeParams { + void* dptr; ///< the pointer to be freed +} hipMemFreeNodeParams; + +/** + * Params for different graph nodes + */ +typedef struct hipGraphNodeParams { + hipGraphNodeType type; + int reserved0[3]; + union { + long long reserved1[29]; + hipKernelNodeParams kernel; + hipMemcpyNodeParams memcpy; + hipMemsetParams memset; + hipHostNodeParams host; + hipChildGraphNodeParams graph; + hipEventWaitNodeParams eventWait; + hipEventRecordNodeParams eventRecord; + hipExternalSemaphoreSignalNodeParams extSemSignal; + hipExternalSemaphoreWaitNodeParams extSemWait; + hipMemAllocNodeParams alloc; + hipMemFreeNodeParams free; + }; + + long long reserved2; +} hipGraphNodeParams; + +/** + * This port activates when the kernel has finished executing. + */ +#define hipGraphKernelNodePortDefault 0 + +/** + * This port activates when all blocks of the kernel have begun execution. + */ +#define hipGraphKernelNodePortLaunchCompletion 2 + +/** + * This port activates when all blocks of the kernel have performed + * hipTriggerProgrammaticLaunchCompletion() or have terminated. + * It must be used with edge type hipGraphDependencyTypeProgrammatic. + */ +#define hipGraphKernelNodePortProgrammatic 1 + +typedef enum hipGraphDependencyType { + hipGraphDependencyTypeDefault = 0, + hipGraphDependencyTypeProgrammatic = 1 +} hipGraphDependencyType; + +typedef struct hipGraphEdgeData { + unsigned char + from_port; ///< This indicates when the dependency is triggered from the upstream node on the + ///< edge. The meaning is specfic to the node type. A value of 0 in all cases + ///< means full completion of the upstream node, with memory visibility to the + ///< downstream node or portion thereof (indicated by to_port). Only kernel nodes + ///< define non-zero ports. A kernel node can use the following output port types: + ///< hipGraphKernelNodePortDefault, hipGraphKernelNodePortProgrammatic, or + ///< hipGraphKernelNodePortLaunchCompletion. + unsigned char reserved[5]; ///< These bytes are unused and must be zeroed + unsigned char + to_port; ///< Currently no node types define non-zero ports. This field must be set to zero. + unsigned char type; ///< This should be populated with a value from hipGraphDependencyType +} hipGraphEdgeData; + + +/** + * Used to specify custom attributes for launching kernels + */ +typedef struct hipLaunchAttribute_st { + hipLaunchAttributeID id; ///< Identifier of the launch attribute + char pad[8 - sizeof(hipLaunchAttributeID)]; ///< Padding to align the structure to 8 bytes + union { + hipLaunchAttributeValue val; ///< Value associated with the launch attribute + hipLaunchAttributeValue value; ///< Value associated with the launch attribute + }; +} hipLaunchAttribute; + +/** + * HIP extensible launch configuration + */ +typedef struct hipLaunchConfig_st { + dim3 gridDim; ///< Grid dimensions + dim3 blockDim; ///< Block dimensions + size_t dynamicSmemBytes; ///< Dynamic shared-memory size per thread block + hipStream_t stream; ///< Stream identifier + hipLaunchAttribute* attrs; ///< Attributes list + unsigned int numAttrs; ///< Number of attributes +} hipLaunchConfig_t; + +/** + * HIP driver extensible launch configuration + */ +typedef struct HIP_LAUNCH_CONFIG_st { + unsigned int gridDimX; ///< Grid width in blocks + unsigned int gridDimY; ///< Grid height in blocks + unsigned int gridDimZ; ///< Grid depth in blocks + unsigned int blockDimX; ///< Thread block dimension in X + unsigned int blockDimY; ///< Thread block dimension in Y + unsigned int blockDimZ; ///< Thread block dimension in Z + unsigned int sharedMemBytes; ///< Dynamic shared-memory size in bytes per block + hipStream_t hStream; ///< HIP stream identifier + hipLaunchAttribute* attrs; ///< Attribute list + unsigned int numAttrs; ///< Number of attributes +} HIP_LAUNCH_CONFIG; + +/** + * Requested handle type for address range. + */ +typedef enum hipMemRangeHandleType { + hipMemRangeHandleTypeDmaBufFd = 0x1, + hipMemRangeHandleTypeMax = 0x7fffffff +} hipMemRangeHandleType; + +/** + * Mem Range Flags used in hipMemGetHandleForAddressRange. + */ +typedef enum hipMemRangeFlags { + hipMemRangeFlagDmaBufMappingTypePcie = 0x1, + hipMemRangeFlagsMax = 0x7fffffff +} hipMemRangeFlags; + +// Doxygen end group GlobalDefs +/** + * @} + */ +/** + * @defgroup API HIP API + * @{ + * + * Defines the HIP API. See the individual sections for more information. + */ +/** + * @defgroup Driver Initialization and Version + * @{ + * This section describes the initializtion and version functions of HIP runtime API. + * + */ +/** + * @brief Explicitly initializes the HIP runtime. + * + * @param [in] flags Initialization flag, should be zero. + * + * Most HIP APIs implicitly initialize the HIP runtime. + * This API provides control over the timing of the initialization. + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +// TODO-ctx - more description on error codes. +hipError_t hipInit(unsigned int flags); + +/** + * @brief Returns the approximate HIP driver version. + * + * @param [out] driverVersion driver version + * + * HIP driver version shows up in the format: + * HIP_VERSION_MAJOR * 10000000 + HIP_VERSION_MINOR * 100000 + HIP_VERSION_PATCH. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @warning The HIP driver version does not correspond to an exact CUDA driver revision. + * On AMD platform, the API returns the HIP driver version, while on NVIDIA platform, it calls + * the corresponding CUDA runtime API and returns the CUDA driver version. + * There is no mapping/correlation between HIP driver version and CUDA driver version. + * + * @see hipRuntimeGetVersion + */ +hipError_t hipDriverGetVersion(int* driverVersion); +/** + * @brief Returns the approximate HIP Runtime version. + * + * @param [out] runtimeVersion HIP runtime version + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @warning The version definition of HIP runtime is different from CUDA. + * On AMD platform, the function returns HIP runtime version, + * while on NVIDIA platform, it returns CUDA runtime version. + * And there is no mapping/correlation between HIP version and CUDA version. + * + * @see hipDriverGetVersion + */ +hipError_t hipRuntimeGetVersion(int* runtimeVersion); +/** + * @brief Returns a handle to a compute device + * @param [out] device Handle of device + * @param [in] ordinal Device ordinal + * + * @returns #hipSuccess, #hipErrorInvalidDevice + */ +hipError_t hipDeviceGet(hipDevice_t* device, int ordinal); + +/** + * @brief Returns the compute capability of the device + * @param [out] major Major compute capability version number + * @param [out] minor Minor compute capability version number + * @param [in] device Device ordinal + * + * @returns #hipSuccess, #hipErrorInvalidDevice + */ +hipError_t hipDeviceComputeCapability(int* major, int* minor, hipDevice_t device); +/** + * @brief Returns an identifer string for the device. + * @param [out] name String of the device name + * @param [in] len Maximum length of string to store in device name + * @param [in] device Device ordinal + * + * @returns #hipSuccess, #hipErrorInvalidDevice + */ +hipError_t hipDeviceGetName(char* name, int len, hipDevice_t device); +/** + * @brief Returns an UUID for the device.[BETA] + * @param [out] uuid UUID for the device + * @param [in] device device ordinal + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue, #hipErrorNotInitialized, + * #hipErrorDeinitialized + */ +hipError_t hipDeviceGetUuid(hipUUID* uuid, hipDevice_t device); +/** + * @brief Returns a value for attribute of link between two devices + * @param [out] value Pointer of the value for the attrubute + * @param [in] attr enum of hipDeviceP2PAttr to query + * @param [in] srcDevice The source device of the link + * @param [in] dstDevice The destination device of the link + * + * @returns #hipSuccess, #hipErrorInvalidDevice + */ +hipError_t hipDeviceGetP2PAttribute(int* value, hipDeviceP2PAttr attr, int srcDevice, + int dstDevice); +/** + * @brief Returns a PCI Bus Id string for the device, overloaded to take int device ID. + * @param [out] pciBusId The string of PCI Bus Id format for the device + * @param [in] len Maximum length of string + * @param [in] device The device ordinal + * + * @returns #hipSuccess, #hipErrorInvalidDevice + */ +hipError_t hipDeviceGetPCIBusId(char* pciBusId, int len, int device); +/** + * @brief Returns a handle to a compute device. + * @param [out] device The handle of the device + * @param [in] pciBusId The string of PCI Bus Id for the device + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + */ +hipError_t hipDeviceGetByPCIBusId(int* device, const char* pciBusId); +/** + * @brief Returns the total amount of memory on the device. + * @param [out] bytes The size of memory in bytes, on the device + * @param [in] device The ordinal of the device + * + * @returns #hipSuccess, #hipErrorInvalidDevice + */ +hipError_t hipDeviceTotalMem(size_t* bytes, hipDevice_t device); +// doxygen end initialization +/** + * @} + */ +/** + * @defgroup Device Device Management + * @{ + * This section describes the device management functions of HIP runtime API. + */ +/** + * @brief Waits on all active streams on current device + * + * When this command is invoked, the host thread gets blocked until all the commands associated + * with streams associated with the device. HIP does not support multiple blocking modes (yet!). + * + * @returns #hipSuccess + * + * @see hipSetDevice, hipDeviceReset + */ +hipError_t hipDeviceSynchronize(void); +/** + * @brief The state of current device is discarded and updated to a fresh state. + * + * Calling this function deletes all streams created, memory allocated, kernels running, events + * created. Make sure that no other thread is using the device or streams, memory, kernels, events + * associated with the current device. + * + * @returns #hipSuccess + * + * @see hipDeviceSynchronize + */ +hipError_t hipDeviceReset(void); +/** + * @brief Set default device to be used for subsequent hip API calls from this thread. + * + * @param[in] deviceId Valid device in range 0...hipGetDeviceCount(). + * + * Sets @p device as the default device for the calling host thread. Valid device id's are 0... + * (hipGetDeviceCount()-1). + * + * Many HIP APIs implicitly use the "default device" : + * + * - Any device memory subsequently allocated from this host thread (using hipMalloc) will be + * allocated on device. + * - Any streams or events created from this host thread will be associated with device. + * - Any kernels launched from this host thread (using hipLaunchKernel) will be executed on device + * (unless a specific stream is specified, in which case the device associated with that stream will + * be used). + * + * This function may be called from any host thread. Multiple host threads may use the same device. + * This function does no synchronization with the previous or new device, and has very little + * runtime overhead. Applications can use hipSetDevice to quickly switch the default device before + * making a HIP runtime call which uses the default device. + * + * The default device is stored in thread-local-storage for each thread. + * Thread-pool implementations may inherit the default device of the previous thread. A good + * practice is to always call hipSetDevice at the start of HIP coding sequency to establish a known + * standard device. + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorNoDevice + * + * @see #hipGetDevice, #hipGetDeviceCount + */ +hipError_t hipSetDevice(int deviceId); +/** + * @brief Set a list of devices that can be used. + * + * @param[in] device_arr List of devices to try + * @param[in] len Number of devices in specified list + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @see #hipGetDevice, #hipGetDeviceCount. #hipSetDevice. #hipGetDeviceProperties. + * #hipSetDeviceFlags. #hipChooseDevice + * + * */ +hipError_t hipSetValidDevices(int* device_arr, int len); +/** + * @brief Return the default device id for the calling host thread. + * + * @param [out] deviceId *device is written with the default device + * + * HIP maintains an default device for each thread using thread-local-storage. + * This device is used implicitly for HIP runtime APIs called by this thread. + * hipGetDevice returns in * @p device the default device for the calling host thread. + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @see hipSetDevice, hipGetDevicesizeBytes + */ +hipError_t hipGetDevice(int* deviceId); +/** + * @brief Return number of compute-capable devices. + * + * @param [out] count Returns number of compute-capable devices. + * + * @returns #hipSuccess, #hipErrorNoDevice + * + * + * Returns in @p *count the number of devices that have ability to run compute commands. If there + * are no such devices, then @ref hipGetDeviceCount will return #hipErrorNoDevice. If 1 or more + * devices can be found, then hipGetDeviceCount returns #hipSuccess. + */ +hipError_t hipGetDeviceCount(int* count); +/** + * @brief Query for a specific device attribute. + * + * @param [out] pi pointer to value to return + * @param [in] attr attribute to query + * @param [in] deviceId which device to query for information + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + */ +hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int deviceId); +/** + * @brief Returns the default memory pool of the specified device + * + * @param [out] mem_pool Default memory pool to return + * @param [in] device Device index for query the default memory pool + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @see hipDeviceGetDefaultMemPool, hipMallocAsync, hipMemPoolTrimTo, hipMemPoolGetAttribute, + * hipDeviceSetMemPool, hipMemPoolSetAttribute, hipMemPoolSetAccess, hipMemPoolGetAccess + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + */ +hipError_t hipDeviceGetDefaultMemPool(hipMemPool_t* mem_pool, int device); +/** + * @brief Sets the current memory pool of a device + * + * The memory pool must be local to the specified device. + * @p hipMallocAsync allocates from the current mempool of the provided stream's device. + * By default, a device's current memory pool is its default memory pool. + * + * @note Use @p hipMallocFromPoolAsync for asynchronous memory allocations from a device + * different than the one the stream runs on. + * + * @param [in] device Device index for the update + * @param [in] mem_pool Memory pool for update as the current on the specified device + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice, #hipErrorNotSupported + * + * @see hipDeviceGetDefaultMemPool, hipMallocAsync, hipMemPoolTrimTo, hipMemPoolGetAttribute, + * hipDeviceSetMemPool, hipMemPoolSetAttribute, hipMemPoolSetAccess, hipMemPoolGetAccess + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + */ +hipError_t hipDeviceSetMemPool(int device, hipMemPool_t mem_pool); +/** + * @brief Gets the current memory pool for the specified device + * + * Returns the last pool provided to @p hipDeviceSetMemPool for this device + * or the device's default memory pool if @p hipDeviceSetMemPool has never been called. + * By default the current mempool is the default mempool for a device, + * otherwise the returned pool must have been set with @p hipDeviceSetMemPool. + * + * @param [out] mem_pool Current memory pool on the specified device + * @param [in] device Device index to query the current memory pool + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @see hipDeviceGetDefaultMemPool, hipMallocAsync, hipMemPoolTrimTo, hipMemPoolGetAttribute, + * hipDeviceSetMemPool, hipMemPoolSetAttribute, hipMemPoolSetAccess, hipMemPoolGetAccess + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + */ +hipError_t hipDeviceGetMemPool(hipMemPool_t* mem_pool, int device); +/** + * @brief Returns device properties. + * + * @param [out] prop written with device properties + * @param [in] deviceId which device to query for information + * + * @returns #hipSuccess, #hipErrorInvalidDevice + * @bug HIP-Clang always returns 0 for maxThreadsPerMultiProcessor + * @bug HIP-Clang always returns 0 for regsPerBlock + * @bug HIP-Clang always returns 0 for l2CacheSize + * + * Populates hipGetDeviceProperties with information for the specified device. + */ +hipError_t hipGetDeviceProperties(hipDeviceProp_t* prop, int deviceId); +/** + * @brief Gets the maximum width for 1D linear textures on the specified device + * + * This function queries the maximum width, in elements, of 1D linear textures that can be allocated + * on the specified device. The maximum width depends on the texture element size and the hardware + * limitations of the device. + * + * @param [out] max_width Maximum width, in elements, of 1D linear textures that the device can + * support + * @param [in] desc Requested channel format + * @param [in] device Device index to query for maximum 1D texture width + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice + * + * @see hipDeviceGetAttribute, hipMalloc, hipTexRefSetAddressMode + */ +hipError_t hipDeviceGetTexture1DLinearMaxWidth(size_t* max_width, const hipChannelFormatDesc* desc, + int device); +/** + * @brief Set L1/Shared cache partition. + * + * @param [in] cacheConfig Cache configuration + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorNotSupported + * + * Note: AMD devices do not support reconfigurable cache. This API is not implemented + * on AMD platform. If the function is called, it will return hipErrorNotSupported. + * + */ +hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig); +/** + * @brief Get Cache configuration for a specific Device + * + * @param [out] cacheConfig Pointer of cache configuration + * + * @returns #hipSuccess, #hipErrorNotInitialized + * Note: AMD devices do not support reconfigurable cache. This hint is ignored + * on these architectures. + * + */ +hipError_t hipDeviceGetCacheConfig(hipFuncCache_t* cacheConfig); +/** + * @brief Gets resource limits of current device + * + * The function queries the size of limit value, as required by the input enum value hipLimit_t, + * which can be either #hipLimitStackSize, or #hipLimitMallocHeapSize. Any other input as + * default, the function will return #hipErrorUnsupportedLimit. + * + * @param [out] pValue Returns the size of the limit in bytes + * @param [in] limit The limit to query + * + * @returns #hipSuccess, #hipErrorUnsupportedLimit, #hipErrorInvalidValue + * + */ +hipError_t hipDeviceGetLimit(size_t* pValue, enum hipLimit_t limit); +/** + * @brief Sets resource limits of current device. + * + * As the input enum limit, + * #hipLimitStackSize sets the limit value of the stack size on the current GPU device, per thread. + * The limit size can get via hipDeviceGetLimit. The size is in units of 256 dwords, up to the limit + * (128K - 16). + * + * #hipLimitMallocHeapSize sets the limit value of the heap used by the malloc()/free() + * calls. For limit size, use the #hipDeviceGetLimit API. + * + * Any other input as default, the funtion will return hipErrorUnsupportedLimit. + * + * @param [in] limit Enum of hipLimit_t to set + * @param [in] value The size of limit value in bytes + * + * @returns #hipSuccess, #hipErrorUnsupportedLimit, #hipErrorInvalidValue + * + */ +hipError_t hipDeviceSetLimit(enum hipLimit_t limit, size_t value); +/** + * @brief Returns bank width of shared memory for current device + * + * @param [out] pConfig The pointer of the bank width for shared memory + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + * + * Note: AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is + * ignored on those architectures. + * + */ +hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig* pConfig); +/** + * @brief Gets the flags set for current device + * + * @param [out] flags Pointer of the flags + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + */ +hipError_t hipGetDeviceFlags(unsigned int* flags); +/** + * @brief The bank width of shared memory on current device is set + * + * @param [in] config Configuration for the bank width of shared memory + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + * + * Note: AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is + * ignored on those architectures. + * + */ +hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config); +/** + * @brief The current device behavior is changed according to the flags passed. + * + * @param [in] flags Flag to set on the current device + * + * The schedule flags impact how HIP waits for the completion of a command running on a device. + * + * #hipDeviceScheduleSpin : HIP runtime will actively spin in the thread which submitted + * the work until the command completes. This offers the lowest latency, but will consume a CPU + * core and may increase power. + * + * #hipDeviceScheduleYield : The HIP runtime will yield the CPU to system so that other + * tasks can use it. This may increase latency to detect the completion but will consume less + * power and is friendlier to other tasks in the system. + * + * #hipDeviceScheduleBlockingSync : On ROCm platform, this is a synonym for hipDeviceScheduleYield. + * + * #hipDeviceScheduleAuto : This is the default value if the input 'flags' is zero. + * Uses a heuristic to select between Spin and Yield modes. If the number of HIP contexts is + * greater than the number of logical processors in the system, uses Spin scheduling, otherwise + * uses Yield scheduling. + * + * #hipDeviceMapHost : Allows mapping host memory. On ROCm, this is always allowed and + * the flag is ignored. + * + * #hipDeviceLmemResizeToMax : This flag is silently ignored on ROCm. + * + * @returns #hipSuccess, #hipErrorNoDevice, #hipErrorInvalidDevice, #hipErrorSetOnActiveProcess + * + * + */ +hipError_t hipSetDeviceFlags(unsigned flags); +/** + * @brief Device which matches hipDeviceProp_t is returned + * + * @param [out] device Pointer of the device + * @param [in] prop Pointer of the properties + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* prop); +/** + * @brief Returns the link type and hop count between two devices + * + * @param [in] device1 Ordinal for device1 + * @param [in] device2 Ordinal for device2 + * @param [out] linktype Returns the link type (See hsa_amd_link_info_type_t) between the two + * devices + * @param [out] hopcount Returns the hop count between the two devices + * + * Queries and returns the HSA link type and the hop count between the two specified devices. + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipExtGetLinkTypeAndHopCount(int device1, int device2, uint32_t* linktype, + uint32_t* hopcount); +// TODO: implement IPC apis +/** + * @brief Gets an interprocess memory handle for an existing device memory + * allocation + * + * Takes a pointer to the base of an existing device memory allocation created + * with hipMalloc and exports it for use in another process. This is a + * lightweight operation and may be called multiple times on an allocation + * without adverse effects. + * + * If a region of memory is freed with hipFree and a subsequent call + * to hipMalloc returns memory with the same device address, + * hipIpcGetMemHandle will return a unique handle for the + * new memory. + * + * @param handle - Pointer to user allocated hipIpcMemHandle to return + * the handle in. + * @param devPtr - Base pointer to previously allocated device memory + * + * @returns #hipSuccess, #hipErrorInvalidHandle, #hipErrorOutOfMemory, #hipErrorMapFailed + * + * @note This IPC memory related feature API on Windows may behave differently from Linux. + * + */ +hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr); +/** + * @brief Opens an interprocess memory handle exported from another process + * and returns a device pointer usable in the local process. + * + * Maps memory exported from another process with hipIpcGetMemHandle into + * the current device address space. For contexts on different devices + * hipIpcOpenMemHandle can attempt to enable peer access between the + * devices as if the user called hipDeviceEnablePeerAccess. This behavior is + * controlled by the hipIpcMemLazyEnablePeerAccess flag. + * hipDeviceCanAccessPeer can determine if a mapping is possible. + * + * Contexts that may open hipIpcMemHandles are restricted in the following way. + * hipIpcMemHandles from each device in a given process may only be opened + * by one context per device per other process. + * + * Memory returned from hipIpcOpenMemHandle must be freed with + * hipIpcCloseMemHandle. + * + * Calling hipFree on an exported memory region before calling + * hipIpcCloseMemHandle in the importing context will result in undefined + * behavior. + * + * @param devPtr - Returned device pointer + * @param handle - hipIpcMemHandle to open + * @param flags - Flags for this operation. Must be specified as hipIpcMemLazyEnablePeerAccess + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidContext, + * #hipErrorInvalidDevicePointer + * + * @note During multiple processes, using the same memory handle opened by the current context, + * there is no guarantee that the same device poiter will be returned in @p *devPtr. + * This is diffrent from CUDA. + * @note This IPC memory related feature API on Windows may behave differently from Linux. + * + */ +hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, unsigned int flags); +/** + * @brief Close memory mapped with hipIpcOpenMemHandle + * + * Unmaps memory returnd by hipIpcOpenMemHandle. The original allocation + * in the exporting process as well as imported mappings in other processes + * will be unaffected. + * + * Any resources used to enable peer access will be freed if this is the + * last mapping using them. + * + * @param devPtr - Device pointer returned by hipIpcOpenMemHandle + * + * @returns #hipSuccess, #hipErrorMapFailed, #hipErrorInvalidHandle + * + * @note This IPC memory related feature API on Windows may behave differently from Linux. + * + */ +hipError_t hipIpcCloseMemHandle(void* devPtr); + +/** + * @brief Gets an opaque interprocess handle for an event. + * + * This opaque handle may be copied into other processes and opened with hipIpcOpenEventHandle. + * Then hipEventRecord, hipEventSynchronize, hipStreamWaitEvent and hipEventQuery may be used in + * either process. Operations on the imported event after the exported event has been freed with + * hipEventDestroy will result in undefined behavior. + * + * @param[out] handle Pointer to hipIpcEventHandle to return the opaque event handle + * @param[in] event Event allocated with hipEventInterprocess and hipEventDisableTiming flags + * + * @returns #hipSuccess, #hipErrorInvalidConfiguration, #hipErrorInvalidValue + * + * @note This IPC event related feature API is currently applicable on Linux. + * + */ +hipError_t hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event); + +/** + * @brief Opens an interprocess event handles. + * + * Opens an interprocess event handle exported from another process with hipIpcGetEventHandle. The + * returned hipEvent_t behaves like a locally created event with the hipEventDisableTiming flag + * specified. This event need be freed with hipEventDestroy. Operations on the imported event after + * the exported event has been freed with hipEventDestroy will result in undefined behavior. If the + * function is called within the same process where handle is returned by hipIpcGetEventHandle, it + * will return hipErrorInvalidContext. + * + * @param[out] event Pointer to hipEvent_t to return the event + * @param[in] handle The opaque interprocess handle to open + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidContext + * + * @note This IPC event related feature API is currently applicable on Linux. + * + */ +hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle); + +// end doxygen Device +/** + * @} + */ +/** + * + * @defgroup Execution Execution Control + * @{ + * This section describes the execution control functions of HIP runtime API. + * + */ +/** + * @brief Set attribute for a specific function + * + * @param [in] func Pointer of the function + * @param [in] attr Attribute to set + * @param [in] value Value to set + * + * @returns #hipSuccess, #hipErrorInvalidDeviceFunction, #hipErrorInvalidValue + * + * Note: AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is + * ignored on those architectures. + * + */ +hipError_t hipFuncSetAttribute(const void* func, hipFuncAttribute attr, int value); +/** + * @brief Set Cache configuration for a specific function + * + * @param [in] func Pointer of the function. + * @param [in] config Configuration to set. + * + * @returns #hipSuccess, #hipErrorNotInitialized + * Note: AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is ignored + * on those architectures. + * + */ +hipError_t hipFuncSetCacheConfig(const void* func, hipFuncCache_t config); +/** + * @brief Set shared memory configuation for a specific function + * + * @param [in] func Pointer of the function + * @param [in] config Configuration + * + * @returns #hipSuccess, #hipErrorInvalidDeviceFunction, #hipErrorInvalidValue + * + * Note: AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is + * ignored on those architectures. + * + */ +hipError_t hipFuncSetSharedMemConfig(const void* func, hipSharedMemConfig config); +// doxygen end execution +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Error Error Handling + * @{ + * This section describes the error handling functions of HIP runtime API. + */ +/** + * @brief Return last error returned by any HIP runtime API call and resets the stored error code to + * #hipSuccess + * + * @returns return code from last HIP called from the active host thread + * + * Returns the last error that has been returned by any of the runtime calls in the same host + * thread, and then resets the saved error to #hipSuccess. + * + * @see hipGetErrorString, hipGetLastError, hipPeakAtLastError, hipError_t + */ +hipError_t hipGetLastError(void); + +/** + * @brief Return last error returned by any HIP runtime API call and resets the stored error code to + * #hipSuccess + * + * @returns return code from last HIP called from the active host thread + * + * Returns the last error that has been returned by any of the runtime calls in the same host + * thread, and then resets the saved error to #hipSuccess. + * + * @see hipGetErrorString, hipGetLastError, hipPeakAtLastError, hipError_t + */ +hipError_t hipExtGetLastError(void); + +/** + * @brief Return last error returned by any HIP runtime API call. + * + * @returns #hipSuccess + * + * Returns the last error that has been returned by any of the runtime calls in the same host + * thread. Unlike hipGetLastError, this function does not reset the saved error code. + * + * @see hipGetErrorString, hipGetLastError, hipPeakAtLastError, hipError_t + */ +hipError_t hipPeekAtLastError(void); +/** + * @brief Return hip error as text string form. + * + * @param hip_error Error code to convert to name. + * @returns const char pointer to the NULL-terminated error name + * + * @see hipGetErrorString, hipGetLastError, hipPeakAtLastError, hipError_t + */ +const char* hipGetErrorName(hipError_t hip_error); +/** + * @brief Return handy text string message to explain the error which occurred + * + * @param hipError Error code to convert to string. + * @returns const char pointer to the NULL-terminated error string + * + * @see hipGetErrorName, hipGetLastError, hipPeakAtLastError, hipError_t + */ +const char* hipGetErrorString(hipError_t hipError); +/** + * @brief Return hip error as text string form. + * + * @param [in] hipError Error code to convert to string. + * @param [out] errorString char pointer to the NULL-terminated error string + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipGetErrorName, hipGetLastError, hipPeakAtLastError, hipError_t + */ +hipError_t hipDrvGetErrorName(hipError_t hipError, const char** errorString); +/** + * @brief Return handy text string message to explain the error which occurred + * + * @param [in] hipError Error code to convert to string. + * @param [out] errorString char pointer to the NULL-terminated error string + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipGetErrorName, hipGetLastError, hipPeakAtLastError, hipError_t + */ +hipError_t hipDrvGetErrorString(hipError_t hipError, const char** errorString); +// end doxygen Error +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Stream Stream Management + * @{ + * This section describes the stream management functions of HIP runtime API. + * The following Stream APIs are not (yet) supported in HIP: + * - hipStreamAttachMemAsync is a nop + * - hipDeviceGetStreamPriorityRange returns #hipSuccess + */ + +/** + * @brief Creates an asynchronous stream. + * + * @param[in, out] stream Valid pointer to hipStream_t. This function writes the memory with the + * newly created stream. + * @returns #hipSuccess, #hipErrorInvalidValue + * + * Creates a new asynchronous stream with its associated current device. The @p stream returns an + * opaque handle that can be used to reference the newly created stream in subsequent hipStream* + * commands. The stream is allocated on the heap and will remain allocated even if the handle goes + * out-of-scope. To release the memory used by the stream, the application must call + * hipStreamDestroy. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamSynchronize, + * hipStreamWaitEvent, hipStreamDestroy + */ +hipError_t hipStreamCreate(hipStream_t* stream); +/** + * @brief Creates an asynchronous stream with flag. + * + * @param[in, out] stream Pointer to new stream + * @param[in] flags Parameters to control stream creation + * @returns #hipSuccess, #hipErrorInvalidValue + * + * Creates a new asynchronous stream with its associated current device. @p stream returns an + * opaque handle that can be used to reference the newly created stream in subsequent hipStream* + * commands. The stream is allocated on the heap and will remain allocated even if the handle + * goes out-of-scope. To release the memory used by the stream, application must call + * hipStreamDestroy. + * + * The @p flags parameter controls behavior of the stream. The valid values are #hipStreamDefault + * and #hipStreamNonBlocking. + * + * @see hipStreamCreate, hipStreamCreateWithPriority, hipStreamSynchronize, hipStreamWaitEvent, + * hipStreamDestroy. + * + */ +hipError_t hipStreamCreateWithFlags(hipStream_t* stream, unsigned int flags); +/** + * @brief Creates an asynchronous stream with the specified priority. + * + * @param[in, out] stream Pointer to new stream + * @param[in] flags Parameters to control stream creation + * @param[in] priority Priority of the stream. Lower numbers represent higher priorities. + * @returns #hipSuccess, #hipErrorInvalidValue + * + * Creates a new asynchronous stream with the specified priority, with its associated current + * device. + * @p stream returns an opaque handle that can be used to reference the newly created stream in + * subsequent hipStream* commands. The stream is allocated on the heap and will remain allocated + * even if the handle goes out-of-scope. To release the memory used by the stream, application must + * call hipStreamDestroy. + * + * The @p flags parameter controls behavior of the stream. The valid values are #hipStreamDefault + * and #hipStreamNonBlocking. + * + * @see hipStreamCreate, hipStreamSynchronize, hipStreamWaitEvent, hipStreamDestroy + * + */ +hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags, int priority); +/** + * @brief Returns numerical values that correspond to the least and greatest stream priority. + * + * @param[in, out] leastPriority Pointer in which a value corresponding to least priority + * is returned. + * @param[in, out] greatestPriority Pointer in which a value corresponding to greatest priority + * is returned. + * @returns #hipSuccess + * + * Returns in *leastPriority and *greatestPriority the numerical values that correspond to the + * least and greatest stream priority respectively. Stream priorities follow a convention where + * lower numbers imply greater priorities. The range of meaningful stream priorities is given by + * [*leastPriority,*greatestPriority]. If the user attempts to create a stream with a priority + * value that is outside the meaningful range as specified by this API, the priority is + * automatically clamped to within the valid range. + * + * @warning This API is under development on AMD GPUs and simply returns #hipSuccess. + */ +hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority); +/** + * @brief Destroys the specified stream. + * + * @param[in] stream Stream identifier + * @returns #hipSuccess #hipErrorInvalidHandle + * + * Destroys the specified stream. + * + * If commands are still executing on the specified stream, some may complete execution before the + * queue is deleted. + * + * The queue may be destroyed while some commands are still inflight, or may wait for all commands + * queued to the stream before destroying it. + * + * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamQuery, + * hipStreamWaitEvent, hipStreamSynchronize + */ +hipError_t hipStreamDestroy(hipStream_t stream); +/** + * @brief Returns #hipSuccess if all of the operations in the specified @p stream have completed, or + * #hipErrorNotReady if not. + * + * @param[in] stream Stream to query + * + * @returns #hipSuccess, #hipErrorNotReady, #hipErrorInvalidHandle + * + * This is thread-safe and returns a snapshot of the current state of the queue. However, if other + * host threads are sending work to the stream, the status may change immediately after the function + * is called. It is typically used for debug. + * + * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamWaitEvent, + * hipStreamSynchronize, hipStreamDestroy + */ +hipError_t hipStreamQuery(hipStream_t stream); +/** + * @brief Waits for all commands in the stream to complete. + * + * @param[in] stream Stream identifier. + * + * @returns #hipSuccess, #hipErrorInvalidHandle + * + * This command is host-synchronous : the host will block until all operations on the specified + * stream with its associated device are completed. On multiple device systems, the @p stream is + * associated with its device, no need to call hipSetDevice before this API. + * + * This command follows standard null-stream semantics. Specifying the null stream will cause the + * command to wait for other streams on the same device to complete all pending operations. + * + * This command honors the #hipDeviceScheduleBlockingSync flag, which controls whether the wait is + * active or blocking. + * + * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamWaitEvent, + * hipStreamDestroy + * + */ +hipError_t hipStreamSynchronize(hipStream_t stream); +/** + * @brief Makes the specified compute stream wait for the specified event + * + * @param[in] stream Stream to make wait + * @param[in] event Event to wait on + * @param[in] flags Parameters to control the operation + * + * @returns #hipSuccess, #hipErrorInvalidHandle, #hipErrorInvalidValue, + * #hipErrorStreamCaptureIsolation + * + * This function inserts a wait operation into the specified stream. + * All future work submitted to @p stream will wait until @p event reports completion before + * beginning execution. + * + * Flags include: + * hipEventWaitDefault: Default event creation flag. + * hipEventWaitExternal: Wait is captured in the graph as an external event node when + * performing stream capture + * + * This function only waits for commands in the current stream to complete. Notably, this function + * does not implicitly wait for commands in the default stream to complete, even if the specified + * stream is created with hipStreamNonBlocking = 0. + * + * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, + * hipStreamSynchronize, hipStreamDestroy + */ +hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags __dparm(0)); +/** + * @brief Returns flags associated with this stream. + * + * @param[in] stream Stream to be queried + * @param[in,out] flags Pointer to an unsigned integer in which the stream's flags are returned + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidHandle. + * + * @see hipStreamCreateWithFlags + */ +hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int* flags); +/** + * @brief Queries the Id of a stream. + * + * @param[in] stream Stream to be queried + * @param[in,out] flags Pointer to an unsigned long long in which the stream's id is returned + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidHandle. + * + * @see hipStreamCreateWithFlags, hipStreamGetFlags, hipStreamCreateWithPriority, hipStreamGetPriority + */ +hipError_t hipStreamGetId(hipStream_t stream, unsigned long long* streamId); +/** + * @brief Queries the priority of a stream. + * + * @param[in] stream Stream to be queried + * @param[in,out] priority Pointer to an unsigned integer in which the stream's priority is + * returned + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidHandle. + * + * @see hipStreamCreateWithPriority + */ +hipError_t hipStreamGetPriority(hipStream_t stream, int* priority); +/** + * @brief Gets the device associated with the stream. + * + * @param[in] stream Stream to be queried + * @param[out] device Device associated with the stream + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorContextIsDestroyed, #hipErrorInvalidHandle, + * #hipErrorNotInitialized, #hipErrorDeinitialized, #hipErrorInvalidContext + * + * @see hipStreamCreate, hipStreamDestroy, hipDeviceGetStreamPriorityRange + */ +hipError_t hipStreamGetDevice(hipStream_t stream, hipDevice_t* device); +/** + * @brief Creates an asynchronous stream with the specified CU mask. + * + * @param[in, out] stream Pointer to new stream + * @param[in] cuMaskSize Size of CU mask bit array passed in. + * @param[in] cuMask Bit-vector representing the CU mask. Each active bit represents using one CU. + * The first 32 bits represent the first 32 CUs, and so on. If its size is greater than physical + * CU number (i.e., multiProcessorCount member of hipDeviceProp_t), the extra elements are ignored. + * It is user's responsibility to make sure the input is meaningful. + * @returns #hipSuccess, #hipErrorInvalidHandle, #hipErrorInvalidValue + * + * Creates a new asynchronous stream with the specified CU mask. @p stream returns an opaque + * handle that can be used to reference the newly created stream in subsequent hipStream* commands. + * The stream is allocated on the heap and will remain allocated even if the handle goes + * out-of-scope. To release the memory used by the stream, application must call hipStreamDestroy. + * + * @see hipStreamCreate, hipStreamSynchronize, hipStreamWaitEvent, hipStreamDestroy + */ +hipError_t hipExtStreamCreateWithCUMask(hipStream_t* stream, uint32_t cuMaskSize, + const uint32_t* cuMask); +/** + * @brief Gets CU mask associated with an asynchronous stream + * + * @param[in] stream Stream to be queried + * @param[in] cuMaskSize Number of the block of memories (uint32_t *) allocated by user + * @param[out] cuMask Pointer to a pre-allocated block of memories (uint32_t *) in which + * the stream's CU mask is returned. The CU mask is returned in a chunck of 32 bits where + * each active bit represents one active CU. + * @returns #hipSuccess, #hipErrorInvalidHandle, #hipErrorInvalidValue + * + * @see hipStreamCreate, hipStreamSynchronize, hipStreamWaitEvent, hipStreamDestroy + */ +hipError_t hipExtStreamGetCUMask(hipStream_t stream, uint32_t cuMaskSize, uint32_t* cuMask); +/** + * Stream CallBack struct + */ +typedef void (*hipStreamCallback_t)(hipStream_t stream, hipError_t status, void* userData); +/** + * @brief Adds a callback to be called on the host after all currently enqueued items in the stream + * have completed. For each hipStreamAddCallback call, a callback will be executed exactly once. + * The callback will block later work in the stream until it is finished. + * + * @param[in] stream - Stream to add callback to + * @param[in] callback - The function to call once preceding stream operations are complete + * @param[in] userData - User specified data to be passed to the callback function + * @param[in] flags - Reserved for future use, must be 0 + * @returns #hipSuccess, #hipErrorInvalidHandle, #hipErrorNotSupported + * + * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamQuery, hipStreamSynchronize, + * hipStreamWaitEvent, hipStreamDestroy, hipStreamCreateWithPriority + * + */ +hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData, + unsigned int flags); + +/** + *@brief Sets stream attribute. Updated attribute is applied to work submitted to the stream. + * @param[in] stream - Stream to set attributes to + * @param[in] attr - Attribute ID for the attribute to set + * @param[in] value - Attribute value for the attribute to set + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidResourceHandle + */ +hipError_t hipStreamSetAttribute(hipStream_t stream, hipStreamAttrID attr, + const hipStreamAttrValue* value); + +/** + *@brief queries stream attribute. + * @param[in] stream - Stream to geet attributes from + * @param[in] attr - Attribute ID for the attribute to query + * @param[out] value - Attribute value output + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidResourceHandle + */ +hipError_t hipStreamGetAttribute(hipStream_t stream, hipStreamAttrID attr, + hipStreamAttrValue* value_out); + +/** + *@brief Copies attributes from source stream to destination stream. + * @param[in] dst - Destination stream + * @param[in] src - Source stream + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipStreamCopyAttributes(hipStream_t dst, hipStream_t src); + +// end doxygen Stream +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup StreamM Stream Memory Operations + * @{ + * This section describes Stream Memory Wait and Write functions of HIP runtime API. + */ + +/** + * @brief Enqueues a wait command to the stream.[BETA] + * + * @param [in] stream - Stream identifier + * @param [in] ptr - Pointer to memory object allocated using #hipMallocSignalMemory flag + * @param [in] value - Value to be used in compare operation + * @param [in] flags - Defines the compare operation, supported values are #hipStreamWaitValueGte + * #hipStreamWaitValueEq, #hipStreamWaitValueAnd and #hipStreamWaitValueNor + * @param [in] mask - Mask to be applied on value at memory before it is compared with value, + * default value is set to enable every bit + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * Enqueues a wait command to the stream, all operations enqueued on this stream after this, will + * not execute until the defined wait condition is true. + * + * #hipStreamWaitValueGte: waits until *ptr&mask >= value + * + * #hipStreamWaitValueEq : waits until *ptr&mask == value + * + * #hipStreamWaitValueAnd: waits until ((*ptr&mask) & value) != 0 + * + * #hipStreamWaitValueNor: waits until ~((*ptr&mask) | (value&mask)) != 0 + * + * @note when using #hipStreamWaitValueNor, mask is applied on both 'value' and '*ptr'. + * + * @note Support for #hipStreamWaitValue32 can be queried using 'hipDeviceGetAttribute()' and + * 'hipDeviceAttributeCanUseStreamWaitValue' flag. + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @see hipExtMallocWithFlags, hipFree, hipStreamWaitValue64, hipStreamWriteValue64, + * hipStreamWriteValue32, hipDeviceGetAttribute + */ + +hipError_t hipStreamWaitValue32(hipStream_t stream, void* ptr, uint32_t value, unsigned int flags, + uint32_t mask __dparm(0xFFFFFFFF)); + +/** + * @brief Enqueues a wait command to the stream.[BETA] + * + * @param [in] stream - Stream identifier + * @param [in] ptr - Pointer to memory object allocated using 'hipMallocSignalMemory' flag + * @param [in] value - Value to be used in compare operation + * @param [in] flags - Defines the compare operation, supported values are #hipStreamWaitValueGte + * #hipStreamWaitValueEq, #hipStreamWaitValueAnd and #hipStreamWaitValueNor. + * @param [in] mask - Mask to be applied on value at memory before it is compared with value + * default value is set to enable every bit + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * Enqueues a wait command to the stream, all operations enqueued on this stream after this, will + * not execute until the defined wait condition is true. + * + * #hipStreamWaitValueGte: waits until *ptr&mask >= value + * + * #hipStreamWaitValueEq : waits until *ptr&mask == value + * + * #hipStreamWaitValueAnd: waits until ((*ptr&mask) & value) != 0 + * + * #hipStreamWaitValueNor: waits until ~((*ptr&mask) | (value&mask)) != 0 + * + * @note when using #hipStreamWaitValueNor, mask is applied on both 'value' and '*ptr'. + * + * @note Support for hipStreamWaitValue64 can be queried using 'hipDeviceGetAttribute()' and + * 'hipDeviceAttributeCanUseStreamWaitValue' flag. + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @see hipExtMallocWithFlags, hipFree, hipStreamWaitValue32, hipStreamWriteValue64, + * hipStreamWriteValue32, hipDeviceGetAttribute + */ + +hipError_t hipStreamWaitValue64(hipStream_t stream, void* ptr, uint64_t value, unsigned int flags, + uint64_t mask __dparm(0xFFFFFFFFFFFFFFFF)); + +/** + * @brief Enqueues a write command to the stream.[BETA] + * + * @param [in] stream - Stream identifier + * @param [in] ptr - Pointer to a GPU accessible memory object + * @param [in] value - Value to be written + * @param [in] flags - reserved, ignored for now, will be used in future releases + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * Enqueues a write command to the stream, write operation is performed after all earlier commands + * on this stream have completed the execution. + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @see hipExtMallocWithFlags, hipFree, hipStreamWriteValue32, hipStreamWaitValue32, + * hipStreamWaitValue64 + */ + +hipError_t hipStreamWriteValue32(hipStream_t stream, void* ptr, uint32_t value, unsigned int flags); +/** + * @brief Enqueues a write command to the stream.[BETA] + * + * @param [in] stream - Stream identifier + * @param [in] ptr - Pointer to a GPU accessible memory object + * @param [in] value - Value to be written + * @param [in] flags - reserved, ignored for now, will be used in future releases + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * Enqueues a write command to the stream, write operation is performed after all earlier commands + * on this stream have completed the execution. + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @see hipExtMallocWithFlags, hipFree, hipStreamWriteValue32, hipStreamWaitValue32, + * hipStreamWaitValue64 + */ + +hipError_t hipStreamWriteValue64(hipStream_t stream, void* ptr, uint64_t value, unsigned int flags); + +/** + * @brief Enqueues an array of stream memory operations in the stream.[BETA] + * + * @param [in] stream - Stream identifier + * @param [in] count - The number of operations in the array. Must be less than 256 + * @param [in] paramArray - The types and parameters of the individual operations. + * @param [in] flags - Reserved for future expansion; must be 0. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * Batch operations to synchronize the stream via memory operations. + * + * @warning This API is marked as beta, meaning, while this is feature complete, + * it is still open to changes and may have outstanding issues. + * + * @see hipStreamWriteValue32, hipStreamWaitValue32, + * hipStreamWaitValue64. hipStreamWriteValue64 + */ + +hipError_t hipStreamBatchMemOp(hipStream_t stream, unsigned int count, + hipStreamBatchMemOpParams* paramArray, unsigned int flags); + +/** + * @brief Creates a batch memory operation node and adds it to a graph.[BETA] + * + * @param [in] phGraphNode - Returns the newly created node + * @param [in] hGraph - Graph to which to add the node + * @param [in] dependencies - Dependencies of the node + * @param [in] numDependencies - Number of dependencies + * @param [in] nodeParams - Parameters for the node + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @warning This API is marked as beta, meaning, while this is feature complete, + * it is still open to changes and may have outstanding issues. + * + * @see hipStreamWriteValue32, hipStreamWaitValue32, + * hipStreamWaitValue64. hipStreamWriteValue64, hipStreamBatchMemOp + */ +hipError_t hipGraphAddBatchMemOpNode(hipGraphNode_t* phGraphNode, hipGraph_t hGraph, + const hipGraphNode_t* dependencies, size_t numDependencies, + const hipBatchMemOpNodeParams* nodeParams); + +/** + * @brief Returns a batch mem op node's parameters.[BETA] + * + * @param [in] hNode - Node to get the parameters for + * @param [in] nodeParams_out - Pointer to return the parameters + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * Returns the parameters of batch mem op node hNode in nodeParams_out. + * The paramArray returned in nodeParams_out is owned by the node. + * This memory remains valid until the node is destroyed or its parameters are modified, + * and should not be modified directly. + * + * @warning This API is marked as beta, meaning, while this is feature complete, + * it is still open to changes and may have outstanding issues. + * + * @see hipStreamWriteValue32, hipStreamWaitValue32, + * hipStreamWaitValue64. hipStreamWriteValue64. hipGraphBatchMemOpNodeSetParams + */ + +hipError_t hipGraphBatchMemOpNodeGetParams(hipGraphNode_t hNode, + hipBatchMemOpNodeParams* nodeParams_out); + +/** + * @brief Sets the batch mem op node's parameters.[BETA] + * + * @param [in] hNode - Node to set the parameters for + * @param [in] nodeParams - Parameters to copy + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * Sets the parameters of batch mem op node hNode to nodeParams. + * + * @warning This API is marked as beta, meaning, while this is feature complete, + * it is still open to changes and may have outstanding issues. + * + * @see hipStreamWriteValue32, hipStreamWaitValue32, + * hipStreamWaitValue64. hipStreamWriteValue64, hipGraphBatchMemOpNodeGetParams + */ + +hipError_t hipGraphBatchMemOpNodeSetParams(hipGraphNode_t hNode, + hipBatchMemOpNodeParams* nodeParams); + +/** + * @brief Sets the parameters for a batch mem op node in the given graphExec.[BETA] + * + * @param [in] hGraphExec - The executable graph in which to set the specified node + * @param [in] hNode - Batch mem op node from the graph from which graphExec was instantiated + * @param [in] nodeParams - Updated Parameters to set + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * Sets the parameters of a batch mem op node in an executable graph hGraphExec. + * The node is identified by the corresponding node hNode in the non-executable graph, + * from which the executable graph was instantiated. + * + * @warning This API is marked as beta, meaning, while this is feature complete, + * it is still open to changes and may have outstanding issues. + * + * @see hipStreamWriteValue32, hipStreamWaitValue32, + * hipStreamWaitValue64. hipStreamWriteValue64, hipStreamBatchMemOp + */ +hipError_t hipGraphExecBatchMemOpNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + const hipBatchMemOpNodeParams* nodeParams); + +// end doxygen Stream Memory Operations +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Event Event Management + * @{ + * This section describes the event management functions of HIP runtime API. + */ +/** + * @brief Create an event with the specified flags + * + * @param[in,out] event Returns the newly created event. + * @param[in] flags Flags to control event behavior. Valid values are #hipEventDefault, + #hipEventBlockingSync, #hipEventDisableTiming, #hipEventInterprocess + * #hipEventDefault : Default flag. The event will use active synchronization and will support + timing. Blocking synchronization provides lowest possible latency at the expense of dedicating a + CPU to poll on the event. + * #hipEventBlockingSync : The event will use blocking synchronization : if hipEventSynchronize is + called on this event, the thread will block until the event completes. This can increase latency + for the synchroniation but can result in lower power and more resources for other CPU threads. + * #hipEventDisableTiming : Disable recording of timing information. Events created with this flag + would not record profiling data and provide best performance if used for synchronization. + * #hipEventInterprocess : The event can be used as an interprocess event. hipEventDisableTiming + flag also must be set when hipEventInterprocess flag is set. + * #hipEventDisableSystemFence : Disable acquire and release system scope fence. This may + improve performance but device memory may not be visible to the host and other devices + if this flag is set. + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue, + #hipErrorLaunchFailure, #hipErrorOutOfMemory + * + * @see hipEventCreate, hipEventSynchronize, hipEventDestroy, hipEventElapsedTime + */ +hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned flags); +/** + * Create an event + * + * @param[in,out] event Returns the newly created event. + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue, + * #hipErrorLaunchFailure, #hipErrorOutOfMemory + * + * @see hipEventCreateWithFlags, hipEventRecord, hipEventQuery, hipEventSynchronize, + * hipEventDestroy, hipEventElapsedTime + */ +hipError_t hipEventCreate(hipEvent_t* event); +/** + * @brief Record an event in the specified stream. + * + * @param[in] event event to record. + * @param[in] stream stream in which to record event. + * @param[in] flags parameter for operations + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized, + * #hipErrorInvalidHandle, #hipErrorLaunchFailure + * + * hipEventQuery() or hipEventSynchronize() must be used to determine when the event + * transitions from "recording" (after hipEventRecord() is called) to "recorded" + * (when timestamps are set, if requested). + * + * Events which are recorded in a non-NULL stream will transition to + * from recording to "recorded" state when they reach the head of + * the specified stream, after all previous + * commands in that stream have completed executing. + * + * Flags include: + * hipEventRecordDefault: Default event creation flag. + * hipEventRecordExternal: Event is captured in the graph as an external event node when + * performing stream capture + * + * If hipEventRecord() has been previously called on this event, then this call will overwrite any + * existing state in event. + * + * If this function is called on an event that is currently being recorded, results are undefined + * - either outstanding recording may save state into the event, and the order is not guaranteed. + * + * @note: If this function is not called before use hipEventQuery() or hipEventSynchronize(), + * #hipSuccess is returned, meaning no pending event in the stream. + * + * @see hipEventCreate, hipEventCreateWithFlags, hipEventQuery, hipEventSynchronize, + * hipEventDestroy, hipEventElapsedTime + * + */ +hipError_t hipEventRecordWithFlags(hipEvent_t event, hipStream_t stream __dparm(0), + unsigned int flags __dparm(0)); +/** + * @brief Record an event in the specified stream. + * + * @param[in] event event to record. + * @param[in] stream stream in which to record event. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized, + * #hipErrorInvalidHandle, #hipErrorLaunchFailure + * + * hipEventQuery() or hipEventSynchronize() must be used to determine when the event + * transitions from "recording" (after hipEventRecord() is called) to "recorded" + * (when timestamps are set, if requested). + * + * Events which are recorded in a non-NULL stream will transition to + * from recording to "recorded" state when they reach the head of + * the specified stream, after all previous + * commands in that stream have completed executing. + * + * If hipEventRecord() has been previously called on this event, then this call will overwrite any + * existing state in event. + * + * If this function is called on an event that is currently being recorded, results are undefined + * - either outstanding recording may save state into the event, and the order is not guaranteed. + * + * @note If this function is not called before use hipEventQuery() or hipEventSynchronize(), + * #hipSuccess is returned, meaning no pending event in the stream. + * + * @see hipEventCreate, hipEventCreateWithFlags, hipEventQuery, hipEventSynchronize, + * hipEventDestroy, hipEventElapsedTime + * + */ +#ifdef __cplusplus +hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream = NULL); +#else +hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream); +#endif +/** + * @brief Destroy the specified event. + * + * @param[in] event Event to destroy. + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue, + * #hipErrorLaunchFailure + * + * Releases memory associated with the event. If the event is recording but has not completed + * recording when hipEventDestroy() is called, the function will return immediately and the + * completion_future resources will be released later, when the hipDevice is synchronized. + * + * @see hipEventCreate, hipEventCreateWithFlags, hipEventQuery, hipEventSynchronize, hipEventRecord, + * hipEventElapsedTime + * + * @returns #hipSuccess + */ +hipError_t hipEventDestroy(hipEvent_t event); +/** + * @brief Wait for an event to complete. + * + * This function will block until the event is ready, waiting for all previous work in the stream + * specified when event was recorded with hipEventRecord(). + * + * If hipEventRecord() has not been called on @p event, this function returns #hipSuccess when no + * event is captured. + * + * + * @param[in] event Event on which to wait. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized, + * #hipErrorInvalidHandle, #hipErrorLaunchFailure + * + * @see hipEventCreate, hipEventCreateWithFlags, hipEventQuery, hipEventDestroy, hipEventRecord, + * hipEventElapsedTime + */ +hipError_t hipEventSynchronize(hipEvent_t event); +/** + * @brief Return the elapsed time between two events. + * + * @param[out] ms : Return time between start and stop in ms. + * @param[in] start : Start event. + * @param[in] stop : Stop event. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotReady, #hipErrorInvalidHandle, + * #hipErrorNotInitialized, #hipErrorLaunchFailure + * + * Computes the elapsed time between two events. Time is computed in ms, with + * a resolution of approximately 1 us. + * + * Events which are recorded in a NULL stream will block until all commands + * on all other streams complete execution, and then record the timestamp. + * + * Events which are recorded in a non-NULL stream will record their timestamp + * when they reach the head of the specified stream, after all previous + * commands in that stream have completed executing. Thus the time that + * the event recorded may be significantly after the host calls hipEventRecord(). + * + * If hipEventRecord() has not been called on either event, then #hipErrorInvalidHandle is + * returned. If hipEventRecord() has been called on both events, but the timestamp has not yet been + * recorded on one or both events (that is, hipEventQuery() would return #hipErrorNotReady on at + * least one of the events), then #hipErrorNotReady is returned. + * + * @see hipEventCreate, hipEventCreateWithFlags, hipEventQuery, hipEventDestroy, hipEventRecord, + * hipEventSynchronize + */ +hipError_t hipEventElapsedTime(float* ms, hipEvent_t start, hipEvent_t stop); +/** + * @brief Query event status + * + * @param[in] event Event to query. + * @returns #hipSuccess, #hipErrorNotReady, #hipErrorInvalidHandle, #hipErrorInvalidValue, + * #hipErrorNotInitialized, #hipErrorLaunchFailure + * + * Query the status of the specified event. This function will return #hipSuccess if all + * commands in the appropriate stream (specified to hipEventRecord()) have completed. If any + * execution has not completed, then #hipErrorNotReady is returned. + * + * @note This API returns #hipSuccess, if hipEventRecord() is not called before this API. + * + * @see hipEventCreate, hipEventCreateWithFlags, hipEventRecord, hipEventDestroy, + * hipEventSynchronize, hipEventElapsedTime + */ +hipError_t hipEventQuery(hipEvent_t event); +// end doxygen Events +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Memory Memory Management + * @{ + * This section describes the memory management functions of HIP runtime API. + * The following CUDA APIs are not currently supported: + * - cudaMalloc3D + * - cudaMalloc3DArray + * - TODO - more 2D, 3D, array APIs here. + * + * + */ + +/** + * @brief Sets information on the specified pointer.[BETA] + * + * @param [in] value Sets pointer attribute value + * @param [in] attribute Attribute to set + * @param [in] ptr Pointer to set attributes for + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + */ +hipError_t hipPointerSetAttribute(const void* value, hipPointer_attribute attribute, + hipDeviceptr_t ptr); + + +/** + * @brief Returns attributes for the specified pointer + * + * @param [out] attributes attributes for the specified pointer + * @param [in] ptr pointer to get attributes for + * + * The output parameter 'attributes' has a member named 'type' that describes what memory the + * pointer is associated with, such as device memory, host memory, managed memory, and others. + * Otherwise, the API cannot handle the pointer and returns #hipErrorInvalidValue. + * + * @note The unrecognized memory type is unsupported to keep the HIP functionality backward + * compatibility due to #hipMemoryType enum values. + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @note The current behavior of this HIP API corresponds to the CUDA API before version 11.0. + * + * @see hipPointerGetAttribute + */ +hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void* ptr); +/** + * @brief Returns information about the specified pointer.[BETA] + * + * @param [in, out] data Returned pointer attribute value + * @param [in] attribute Attribute to query for + * @param [in] ptr Pointer to get attributes for + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @see hipPointerGetAttributes + */ +hipError_t hipPointerGetAttribute(void* data, hipPointer_attribute attribute, hipDeviceptr_t ptr); +/** + * @brief Returns information about the specified pointer.[BETA] + * + * @param [in] numAttributes number of attributes to query for + * @param [in] attributes attributes to query for + * @param [in, out] data a two-dimensional containing pointers to memory locations + * where the result of each attribute query will be written to + * @param [in] ptr pointer to get attributes for + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @see hipPointerGetAttribute + */ +hipError_t hipDrvPointerGetAttributes(unsigned int numAttributes, hipPointer_attribute* attributes, + void** data, hipDeviceptr_t ptr); +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup External External Resource Interoperability + * @{ + * @ingroup API + * + * This section describes the external resource interoperability functions of HIP runtime API. + * + */ +/** + * @brief Imports an external semaphore. + * + * @param[out] extSem_out External semaphores to be waited on + * @param[in] semHandleDesc Semaphore import handle descriptor + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @see + * + * @note This API is currently not supported on Linux. + * + */ +hipError_t hipImportExternalSemaphore(hipExternalSemaphore_t* extSem_out, + const hipExternalSemaphoreHandleDesc* semHandleDesc); +/** + * @brief Signals a set of external semaphore objects. + * + * @param[in] extSemArray External semaphores to be waited on + * @param[in] paramsArray Array of semaphore parameters + * @param[in] numExtSems Number of semaphores to wait on + * @param[in] stream Stream to enqueue the wait operations in + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @see + * + * @note This API is currently not supported on Linux. + * + */ +hipError_t hipSignalExternalSemaphoresAsync(const hipExternalSemaphore_t* extSemArray, + const hipExternalSemaphoreSignalParams* paramsArray, + unsigned int numExtSems, hipStream_t stream); +/** + * @brief Waits on a set of external semaphore objects + * + * @param[in] extSemArray External semaphores to be waited on + * @param[in] paramsArray Array of semaphore parameters + * @param[in] numExtSems Number of semaphores to wait on + * @param[in] stream Stream to enqueue the wait operations in + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @see + * + * @note This API is currently not supported on Linux. + * + */ +hipError_t hipWaitExternalSemaphoresAsync(const hipExternalSemaphore_t* extSemArray, + const hipExternalSemaphoreWaitParams* paramsArray, + unsigned int numExtSems, hipStream_t stream); +/** + * @brief Destroys an external semaphore object and releases any references to the underlying + * resource. Any outstanding signals or waits must have completed before the semaphore is destroyed. + * + * @param[in] extSem handle to an external memory object + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @see + * + * @note This API is currently not supported on Linux. + * + */ +hipError_t hipDestroyExternalSemaphore(hipExternalSemaphore_t extSem); + +/** + * @brief Imports an external memory object. + * + * @param[out] extMem_out Returned handle to an external memory object + * @param[in] memHandleDesc Memory import handle descriptor + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @see + * + */ +hipError_t hipImportExternalMemory(hipExternalMemory_t* extMem_out, + const hipExternalMemoryHandleDesc* memHandleDesc); +/** + * @brief Maps a buffer onto an imported memory object. + * + * @param[out] devPtr Returned device pointer to buffer + * @param[in] extMem Handle to external memory object + * @param[in] bufferDesc Buffer descriptor + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @see + */ +hipError_t hipExternalMemoryGetMappedBuffer(void** devPtr, hipExternalMemory_t extMem, + const hipExternalMemoryBufferDesc* bufferDesc); +/** + * @brief Destroys an external memory object. + * + * @param[in] extMem External memory object to be destroyed + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @see + */ +hipError_t hipDestroyExternalMemory(hipExternalMemory_t extMem); +/** + * @brief Maps a mipmapped array onto an external memory object. + * + * @param[out] mipmap mipmapped array to return + * @param[in] extMem external memory object handle + * @param[in] mipmapDesc external mipmapped array descriptor + * + * Returned mipmapped array must be freed using hipFreeMipmappedArray. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidResourceHandle + * + * @see hipImportExternalMemory, hipDestroyExternalMemory, hipExternalMemoryGetMappedBuffer, + * hipFreeMipmappedArray + */ +hipError_t hipExternalMemoryGetMappedMipmappedArray( + hipMipmappedArray_t* mipmap, hipExternalMemory_t extMem, + const hipExternalMemoryMipmappedArrayDesc* mipmapDesc); +// end of external resource +/** + * @} + */ +/** + * @brief Allocate memory on the default accelerator + * + * @param[out] ptr Pointer to the allocated memory + * @param[in] size Requested memory size + * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * + * @returns #hipSuccess, #hipErrorOutOfMemory, #hipErrorInvalidValue (bad context, null *ptr) + * + * @see hipMallocPitch, hipFree, hipMallocArray, hipFreeArray, hipMalloc3D, hipMalloc3DArray, + * hipHostFree, hipHostMalloc + */ +hipError_t hipMalloc(void** ptr, size_t size); +/** + * @brief Allocate memory on the default accelerator + * + * @param[out] ptr Pointer to the allocated memory + * @param[in] sizeBytes Requested memory size + * @param[in] flags Type of memory allocation + * + * If requested memory size is 0, no memory is allocated, *ptr returns nullptr, and #hipSuccess + * is returned. + * + * The memory allocation flag should be either #hipDeviceMallocDefault, + * #hipDeviceMallocFinegrained, #hipDeviceMallocUncached, or #hipMallocSignalMemory. + * If the flag is any other value, the API returns #hipErrorInvalidValue. + * + * @returns #hipSuccess, #hipErrorOutOfMemory, #hipErrorInvalidValue (bad context, null *ptr) + * + * @see hipMallocPitch, hipFree, hipMallocArray, hipFreeArray, hipMalloc3D, hipMalloc3DArray, + * hipHostFree, hiHostMalloc + */ +hipError_t hipExtMallocWithFlags(void** ptr, size_t sizeBytes, unsigned int flags); + + +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup MemoryD Memory Management [Deprecated] + * @ingroup Memory + * @{ + * This section describes the deprecated memory management functions of HIP runtime API. + * + */ + +/** + * @brief Allocate pinned host memory [Deprecated] + * + * @param[out] ptr Pointer to the allocated host pinned memory + * @param[in] size Requested memory size + * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * + * @returns #hipSuccess, #hipErrorOutOfMemory + * + * @warning This API is deprecated, use hipHostMalloc() instead + */ +HIP_DEPRECATED("use hipHostMalloc instead") +hipError_t hipMallocHost(void** ptr, size_t size); +/** + * @brief Allocate pinned host memory [Deprecated] + * + * @param[out] ptr Pointer to the allocated host pinned memory + * @param[in] size Requested memory size + * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * + * @returns #hipSuccess, #hipErrorOutOfMemory + * + * @warning This API is deprecated, use hipHostMalloc() instead + */ +HIP_DEPRECATED("use hipHostMalloc instead") +hipError_t hipMemAllocHost(void** ptr, size_t size); +// end doxygen deprecated management memory +/** + * @} + */ +/** + * @brief Allocates device accessible page locked (pinned) host memory + * + * This API allocates pinned host memory which is mapped into the address space of all GPUs + * in the system, the memory can be accessed directly by the GPU device, and can be read or + * written with much higher bandwidth than pageable memory obtained with functions such as + * malloc(). + * + * Using the pinned host memory, applications can implement faster data transfers for HostToDevice + * and DeviceToHost. The runtime tracks the hipHostMalloc allocations and can avoid some of the + * setup required for regular unpinned memory. + * + * When the memory accesses are infrequent, zero-copy memory can be a good choice, for coherent + * allocation. GPU can directly access the host memory over the CPU/GPU interconnect, without need + * to copy the data. + * + * Currently the allocation granularity is 4KB for the API. + * + * Developers need to choose proper allocation flag with consideration of synchronization. + * + * @param[out] ptr Pointer to the allocated host pinned memory + * @param[in] size Requested memory size in bytes + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * @param[in] flags Type of host memory allocation. See the description of flags in + * hipSetDeviceFlags. + * + * If no input for flags, it will be the default pinned memory allocation on the host. + * + * @returns #hipSuccess, #hipErrorOutOfMemory + * + * + * @see hipSetDeviceFlags, hiptHostFree + */ +hipError_t hipHostMalloc(void** ptr, size_t size, unsigned int flags); +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup MemoryM Managed Memory + * + * @ingroup Memory + * @{ + * This section describes the managed memory management functions of HIP runtime API. + * + * @note The managed memory management APIs are implemented on Linux, under developement + * on Windows. + * + */ +/** + * @brief Allocates memory that will be automatically managed by HIP. + * + * This API is used for managed memory, allows data be shared and accessible to both CPU and + * GPU using a single pointer. + * + * The API returns the allocation pointer, managed by HMM, can be used further to execute kernels + * on device and fetch data between the host and device as needed. + * + * If HMM is not supported, the function behaves the same as @p hipMallocHost . + * + * @note It is recommend to do the capability check before call this API. + * + * @param [out] dev_ptr - pointer to allocated device memory + * @param [in] size - requested allocation size in bytes, it should be granularity of 4KB + * @param [in] flags - must be either hipMemAttachGlobal or hipMemAttachHost + * (defaults to hipMemAttachGlobal) + * + * @returns #hipSuccess, #hipErrorMemoryAllocation, #hipErrorNotSupported, #hipErrorInvalidValue + * + */ +hipError_t hipMallocManaged(void** dev_ptr, size_t size, + unsigned int flags __dparm(hipMemAttachGlobal)); +/** + * @brief Prefetches memory to the specified destination device using HIP. + * + * @param [in] dev_ptr pointer to be prefetched + * @param [in] count size in bytes for prefetching + * @param [in] device destination device to prefetch to + * @param [in] stream stream to enqueue prefetch operation + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPrefetchAsync(const void* dev_ptr, size_t count, int device, + hipStream_t stream __dparm(0)); +/** + * @brief Prefetches memory to the specified destination device using HIP. + * + * @param [in] dev_ptr pointer to be prefetched + * @param [in] count size in bytes for prefetching + * @param [in] location destination location to prefetch to + * @param [in] flags flags for future use, must be zero now. + * @param [in] stream stream to enqueue prefetch operation + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPrefetchAsync_v2(const void* dev_ptr, size_t count, hipMemLocation location, + unsigned int flags, hipStream_t stream __dparm(0)); +/** + * @brief Advise about the usage of a given memory range to HIP. + * + * @param [in] dev_ptr pointer to memory to set the advice for + * @param [in] count size in bytes of the memory range, it should be CPU page size alligned. + * @param [in] advice advice to be applied for the specified memory range + * @param [in] device device to apply the advice for + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * This HIP API advises about the usage to be applied on unified memory allocation in the + * range starting from the pointer address devPtr, with the size of count bytes. + * The memory range must refer to managed memory allocated via the API hipMallocManaged, and the + * range will be handled with proper round down and round up respectively in the driver to + * be aligned to CPU page size, the same way as corresponding CUDA API behaves in CUDA version 8.0 + * and afterwards. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemAdvise(const void* dev_ptr, size_t count, hipMemoryAdvise advice, int device); +/** + * @brief Advise about the usage of a given memory range to HIP. + * + * @param [in] dev_ptr pointer to memory to set the advice for + * @param [in] count size in bytes of the memory range, it should be CPU page size alligned. + * @param [in] advice advice to be applied for the specified memory range + * @param [in] location location to apply the advice for + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * This HIP API advises about the usage to be applied on unified memory allocation in the + * range starting from the pointer address devPtr, with the size of count bytes. + * The memory range must refer to managed memory allocated via the API hipMallocManaged, and the + * range will be handled with proper round down and round up respectively in the driver to + * be aligned to CPU page size, the same way as corresponding CUDA API behaves in CUDA version 8.0 + * and afterwards. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemAdvise_v2(const void* dev_ptr, size_t count, hipMemoryAdvise advice, + hipMemLocation location); +/** + * @brief Query an attribute of a given memory range in HIP. + * + * @param [in,out] data a pointer to a memory location where the result of each + * attribute query will be written to + * @param [in] data_size the size of data + * @param [in] attribute the attribute to query + * @param [in] dev_ptr start of the range to query + * @param [in] count size of the range to query + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemRangeGetAttribute(void* data, size_t data_size, hipMemRangeAttribute attribute, + const void* dev_ptr, size_t count); +/** + * @brief Query attributes of a given memory range in HIP. + * + * @param [in,out] data a two-dimensional array containing pointers to memory locations + * where the result of each attribute query will be written to + * @param [in] data_sizes an array, containing the sizes of each result + * @param [in] attributes the attribute to query + * @param [in] num_attributes an array of attributes to query (numAttributes and the number + * of attributes in this array should match) + * @param [in] dev_ptr start of the range to query + * @param [in] count size of the range to query + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemRangeGetAttributes(void** data, size_t* data_sizes, + hipMemRangeAttribute* attributes, size_t num_attributes, + const void* dev_ptr, size_t count); +/** + * @brief Attach memory to a stream asynchronously in HIP. + * + * @param [in] stream - stream in which to enqueue the attach operation + * @param [in] dev_ptr - pointer to memory (must be a pointer to managed memory or + * to a valid host-accessible region of system-allocated memory) + * @param [in] length - length of memory (defaults to zero) + * @param [in] flags - must be one of hipMemAttachGlobal, hipMemAttachHost or + * hipMemAttachSingle (defaults to hipMemAttachSingle) + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @warning This API is under development. Currently it is a no-operation (NOP) + * function on AMD GPUs and returns #hipSuccess. + */ +hipError_t hipStreamAttachMemAsync(hipStream_t stream, void* dev_ptr, size_t length __dparm(0), + unsigned int flags __dparm(hipMemAttachSingle)); +// end doxygen Managed Memory +/** + * @} + */ + +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup StreamO Stream Ordered Memory Allocator + * @{ + * @ingroup Memory + * This section describes Stream Ordered Memory Allocator functions of HIP runtime API. + * + * The asynchronous allocator allows the user to allocate and free in stream order. + * All asynchronous accesses of the allocation must happen between the stream executions of + * the allocation and the free. If the memory is accessed outside of the promised stream order, + * a use before allocation / use after free error will cause undefined behavior. + * + * The allocator is free to reallocate the memory as long as it can guarantee that compliant memory + * accesses will not overlap temporally. The allocator may refer to internal stream ordering as well + * as inter-stream dependencies (such as HIP events and null stream dependencies) when establishing + * the temporal guarantee. The allocator may also insert inter-stream dependencies to establish + * the temporal guarantee. Whether or not a device supports the integrated stream ordered memory + * allocator may be queried by calling @p hipDeviceGetAttribute with the device attribute + * @p hipDeviceAttributeMemoryPoolsSupported + * + * @note APIs in this section are implemented on Linux, under development on Windows. + */ + +/** + * @brief Allocates memory with stream ordered semantics + * + * Inserts a memory allocation operation into @p stream. + * A pointer to the allocated memory is returned immediately in *dptr. + * The allocation must not be accessed until the allocation operation completes. + * The allocation comes from the memory pool associated with the stream's device. + * + * @note The default memory pool of a device contains device memory from that device. + * @note Basic stream ordering allows future work submitted into the same stream to use the + * allocation. Stream query, stream synchronize, and HIP events can be used to guarantee that + * the allocation operation completes before work submitted in a separate stream runs. + * @note During stream capture, this function results in the creation of an allocation node. + * In this case, the allocation is owned by the graph instead of the memory pool. The memory + * pool's properties are used to set the node's creation parameters. + * + * @param [out] dev_ptr Returned device pointer of memory allocation + * @param [in] size Number of bytes to allocate + * @param [in] stream The stream establishing the stream ordering contract and + * the memory pool to allocate from + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported, #hipErrorOutOfMemory + * + * @see hipMallocFromPoolAsync, hipFreeAsync, hipMemPoolTrimTo, hipMemPoolGetAttribute, + * hipDeviceSetMemPool, hipMemPoolSetAttribute, hipMemPoolSetAccess, hipMemPoolGetAccess + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMallocAsync(void** dev_ptr, size_t size, hipStream_t stream); +/** + * @brief Frees memory with stream ordered semantics + * + * Inserts a free operation into @p stream. + * The allocation must not be used after stream execution reaches the free. + * After this API returns, accessing the memory from any subsequent work launched on the GPU + * or querying its pointer attributes results in undefined behavior. + * + * @note During stream capture, this function results in the creation of a free node and + * must therefore be passed the address of a graph allocation. + * + * @param [in] dev_ptr Pointer to device memory to free + * @param [in] stream The stream, where the destruciton will occur according to the execution order + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @see hipMallocFromPoolAsync, hipMallocAsync, hipMemPoolTrimTo, hipMemPoolGetAttribute, + * hipDeviceSetMemPool, hipMemPoolSetAttribute, hipMemPoolSetAccess, hipMemPoolGetAccess + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipFreeAsync(void* dev_ptr, hipStream_t stream); +/** + * @brief Releases freed memory back to the OS + * + * Releases memory back to the OS until the pool contains fewer than @p min_bytes_to_keep + * reserved bytes, or there is no more memory that the allocator can safely release. + * The allocator cannot release OS allocations that back outstanding asynchronous allocations. + * The OS allocations may happen at different granularity from the user allocations. + * + * @note Allocations that have not been freed count as outstanding. + * @note Allocations that have been asynchronously freed but whose completion has + * not been observed on the host (eg. by a synchronize) can count as outstanding. + * + * @param[in] mem_pool The memory pool to trim allocations + * @param[in] min_bytes_to_hold If the pool has less than min_bytes_to_hold reserved, + * then the TrimTo operation is a no-op. Otherwise the memory pool will contain + * at least min_bytes_to_hold bytes reserved after the operation. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipMallocFromPoolAsync, hipMallocAsync, hipFreeAsync, hipMemPoolGetAttribute, + * hipDeviceSetMemPool, hipMemPoolSetAttribute, hipMemPoolSetAccess, hipMemPoolGetAccess + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPoolTrimTo(hipMemPool_t mem_pool, size_t min_bytes_to_hold); +/** + * @brief Sets attributes of a memory pool + * + * Supported attributes are: + * - @p hipMemPoolAttrReleaseThreshold: (value type = cuuint64_t) + * Amount of reserved memory in bytes to hold onto before trying + * to release memory back to the OS. When more than the release + * threshold bytes of memory are held by the memory pool, the + * allocator will try to release memory back to the OS on the + * next call to stream, event or context synchronize. (default 0) + * - @p hipMemPoolReuseFollowEventDependencies: (value type = int) + * Allow @p hipMallocAsync to use memory asynchronously freed + * in another stream as long as a stream ordering dependency + * of the allocating stream on the free action exists. + * HIP events and null stream interactions can create the required + * stream ordered dependencies. (default enabled) + * - @p hipMemPoolReuseAllowOpportunistic: (value type = int) + * Allow reuse of already completed frees when there is no + * dependency between the free and allocation. (default enabled) + * - @p hipMemPoolReuseAllowInternalDependencies: (value type = int) + * Allow @p hipMallocAsync to insert new stream dependencies + * in order to establish the stream ordering required to reuse + * a piece of memory released by @p hipFreeAsync (default enabled). + * + * @param [in] mem_pool The memory pool to modify + * @param [in] attr The attribute to modify + * @param [in] value Pointer to the value to assign + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipMallocFromPoolAsync, hipMallocAsync, hipFreeAsync, hipMemPoolGetAttribute, + * hipMemPoolTrimTo, hipDeviceSetMemPool, hipMemPoolSetAccess, hipMemPoolGetAccess + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPoolSetAttribute(hipMemPool_t mem_pool, hipMemPoolAttr attr, void* value); +/** + * @brief Gets attributes of a memory pool + * + * Supported attributes are: + * - @p hipMemPoolAttrReleaseThreshold: (value type = cuuint64_t) + * Amount of reserved memory in bytes to hold onto before trying + * to release memory back to the OS. When more than the release + * threshold bytes of memory are held by the memory pool, the + * allocator will try to release memory back to the OS on the + * next call to stream, event or context synchronize. (default 0) + * - @p hipMemPoolReuseFollowEventDependencies: (value type = int) + * Allow @p hipMallocAsync to use memory asynchronously freed + * in another stream as long as a stream ordering dependency + * of the allocating stream on the free action exists. + * HIP events and null stream interactions can create the required + * stream ordered dependencies. (default enabled) + * - @p hipMemPoolReuseAllowOpportunistic: (value type = int) + * Allow reuse of already completed frees when there is no + * dependency between the free and allocation. (default enabled) + * - @p hipMemPoolReuseAllowInternalDependencies: (value type = int) + * Allow @p hipMallocAsync to insert new stream dependencies + * in order to establish the stream ordering required to reuse + * a piece of memory released by @p hipFreeAsync (default enabled). + * + * @param [in] mem_pool The memory pool to get attributes of + * @param [in] attr The attribute to get + * @param [in] value Retrieved value + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipMallocFromPoolAsync, hipMallocAsync, hipFreeAsync, + * hipMemPoolTrimTo, hipDeviceSetMemPool, hipMemPoolSetAttribute, hipMemPoolSetAccess, + * hipMemPoolGetAccess + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPoolGetAttribute(hipMemPool_t mem_pool, hipMemPoolAttr attr, void* value); +/** + * @brief Controls visibility of the specified pool between devices + * + * @param [in] mem_pool Memory pool for acccess change + * @param [in] desc_list Array of access descriptors. Each descriptor instructs the access to + * enable for a single gpu + * @param [in] count Number of descriptors in the map array. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipMallocFromPoolAsync, hipMallocAsync, hipFreeAsync, hipMemPoolGetAttribute, + * hipMemPoolTrimTo, hipDeviceSetMemPool, hipMemPoolSetAttribute, hipMemPoolGetAccess + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPoolSetAccess(hipMemPool_t mem_pool, const hipMemAccessDesc* desc_list, + size_t count); +/** + * @brief Returns the accessibility of a pool from a device + * + * Returns the accessibility of the pool's memory from the specified location. + * + * @param [out] flags Accessibility of the memory pool from the specified location/device + * @param [in] mem_pool Memory pool being queried + * @param [in] location Location/device for memory pool access + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipMallocFromPoolAsync, hipMallocAsync, hipFreeAsync, hipMemPoolGetAttribute, + * hipMemPoolTrimTo, hipDeviceSetMemPool, hipMemPoolSetAttribute, hipMemPoolSetAccess + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPoolGetAccess(hipMemAccessFlags* flags, hipMemPool_t mem_pool, + hipMemLocation* location); +/** + * @brief Creates a memory pool + * + * Creates a HIP memory pool and returns the handle in @p mem_pool. The @p pool_props determines + * the properties of the pool such as the backing device and IPC capabilities. + * + * By default, the memory pool will be accessible from the device it is allocated on. + * + * @param [out] mem_pool Contains createed memory pool + * @param [in] pool_props Memory pool properties + * + * @note Specifying hipMemHandleTypeNone creates a memory pool that will not support IPC. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @see hipMallocFromPoolAsync, hipMallocAsync, hipFreeAsync, hipMemPoolGetAttribute, + * hipMemPoolDestroy, hipMemPoolTrimTo, hipDeviceSetMemPool, hipMemPoolSetAttribute, + * hipMemPoolSetAccess, hipMemPoolGetAccess + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPoolCreate(hipMemPool_t* mem_pool, const hipMemPoolProps* pool_props); +/** + * @brief Destroys the specified memory pool + * + * If any pointers obtained from this pool haven't been freed or + * the pool has free operations that haven't completed + * when @p hipMemPoolDestroy is invoked, the function will return immediately and the + * resources associated with the pool will be released automatically + * once there are no more outstanding allocations. + * + * Destroying the current mempool of a device sets the default mempool of + * that device as the current mempool for that device. + * + * @param [in] mem_pool Memory pool for destruction + * + * @note A device's default memory pool cannot be destroyed. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipMallocFromPoolAsync, hipMallocAsync, hipFreeAsync, hipMemPoolGetAttribute, + * hipMemPoolCreate hipMemPoolTrimTo, hipDeviceSetMemPool, hipMemPoolSetAttribute, + * hipMemPoolSetAccess, hipMemPoolGetAccess + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPoolDestroy(hipMemPool_t mem_pool); +/** + * @brief Allocates memory from a specified pool with stream ordered semantics. + * + * Inserts an allocation operation into @p stream. + * A pointer to the allocated memory is returned immediately in @p dev_ptr. + * The allocation must not be accessed until the allocation operation completes. + * The allocation comes from the specified memory pool. + * + * @note The specified memory pool may be from a device different than that of the specified @p + * stream. + * + * Basic stream ordering allows future work submitted into the same stream to use the allocation. + * Stream query, stream synchronize, and HIP events can be used to guarantee that the allocation + * operation completes before work submitted in a separate stream runs. + * + * @note During stream capture, this function results in the creation of an allocation node. In this + * case, the allocation is owned by the graph instead of the memory pool. The memory pool's + * properties are used to set the node's creation parameters. + * + * @param [out] dev_ptr Returned device pointer + * @param [in] size Number of bytes to allocate + * @param [in] mem_pool The pool to allocate from + * @param [in] stream The stream establishing the stream ordering semantic + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported, #hipErrorOutOfMemory + * + * @see hipMallocAsync, hipFreeAsync, hipMemPoolGetAttribute, hipMemPoolCreate + * hipMemPoolTrimTo, hipDeviceSetMemPool, hipMemPoolSetAttribute, hipMemPoolSetAccess, + * hipMemPoolGetAccess, + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMallocFromPoolAsync(void** dev_ptr, size_t size, hipMemPool_t mem_pool, + hipStream_t stream); +/** + * @brief Exports a memory pool to the requested handle type. + * + * Given an IPC capable mempool, create an OS handle to share the pool with another process. + * A recipient process can convert the shareable handle into a mempool with @p + * hipMemPoolImportFromShareableHandle. Individual pointers can then be shared with the @p + * hipMemPoolExportPointer and @p hipMemPoolImportPointer APIs. The implementation of what the + * shareable handle is and how it can be transferred is defined by the requested handle type. + * + * @note To create an IPC capable mempool, create a mempool with a @p hipMemAllocationHandleType + * other than @p hipMemHandleTypeNone. + * + * @param [out] shared_handle Pointer to the location in which to store the requested handle + * @param [in] mem_pool Pool to export + * @param [in] handle_type The type of handle to create + * @param [in] flags Must be 0 + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorOutOfMemory + * + * @see hipMemPoolImportFromShareableHandle + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPoolExportToShareableHandle(void* shared_handle, hipMemPool_t mem_pool, + hipMemAllocationHandleType handle_type, + unsigned int flags); +/** + * @brief Imports a memory pool from a shared handle. + * + * Specific allocations can be imported from the imported pool with @p hipMemPoolImportPointer. + * + * @note Imported memory pools do not support creating new allocations. + * As such imported memory pools may not be used in @p hipDeviceSetMemPool + * or @p hipMallocFromPoolAsync calls. + * + * @param [out] mem_pool Returned memory pool + * @param [in] shared_handle OS handle of the pool to open + * @param [in] handle_type The type of handle being imported + * @param [in] flags Must be 0 + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorOutOfMemory + * + * @see hipMemPoolExportToShareableHandle + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPoolImportFromShareableHandle(hipMemPool_t* mem_pool, void* shared_handle, + hipMemAllocationHandleType handle_type, + unsigned int flags); +/** + * @brief Export data to share a memory pool allocation between processes. + * + * Constructs @p export_data for sharing a specific allocation from an already shared memory pool. + * The recipient process can import the allocation with the @p hipMemPoolImportPointer api. + * The data is not a handle and may be shared through any IPC mechanism. + * + * @param[out] export_data Returned export data + * @param[in] dev_ptr Pointer to memory being exported + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorOutOfMemory + * + * @see hipMemPoolImportPointer + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPoolExportPointer(hipMemPoolPtrExportData* export_data, void* dev_ptr); +/** + * @brief Import a memory pool allocation from another process. + * + * Returns in @p dev_ptr a pointer to the imported memory. + * The imported memory must not be accessed before the allocation operation completes + * in the exporting process. The imported memory must be freed from all importing processes before + * being freed in the exporting process. The pointer may be freed with @p hipFree + * or @p hipFreeAsync. If @p hipFreeAsync is used, the free must be completed + * on the importing process before the free operation on the exporting process. + * + * @note The @p hipFreeAsync api may be used in the exporting process before + * the @p hipFreeAsync operation completes in its stream as long as the + * @p hipFreeAsync in the exporting process specifies a stream with + * a stream dependency on the importing process's @p hipFreeAsync. + * + * @param [out] dev_ptr Pointer to imported memory + * @param [in] mem_pool Memory pool from which to import a pointer + * @param [in] export_data Data specifying the memory to import + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized, #hipErrorOutOfMemory + * + * @see hipMemPoolExportPointer + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPoolImportPointer(void** dev_ptr, hipMemPool_t mem_pool, + hipMemPoolPtrExportData* export_data); +/** + * @brief Sets memory pool for memory location and allocation type. + * + * + */ +hipError_t hipMemSetMemPool(hipMemLocation* location, hipMemAllocationType type, hipMemPool_t pool); +/** + * @brief Retrieves memory pool for memory location and allocation type. + * + * + */ +hipError_t hipMemGetMemPool(hipMemPool_t* pool, hipMemLocation* location, + hipMemAllocationType type); +// Doxygen end of ordered memory allocator +/** + * @} + */ + +/** + * @brief Allocate device accessible page locked host memory + * + * @param[out] ptr Pointer to the allocated host pinned memory + * @param[in] size Requested memory size in bytes + * @param[in] flags Type of host memory allocation see below + * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * + * Flags: + * - #hipHostAllocDefault Default pinned memory allocation on the host. + * - #hipHostAllocPortable Memory is considered allocated by all contexts. + * - #hipHostAllocMapped Map the allocation into the address space for the current device. + * - #hipHostAllocWriteCombined Allocates the memory as write-combined. + * - #hipHostAllocUncached Allocate the host memory on extended fine grained access system + * memory pool + * + * @return #hipSuccess, #hipErrorOutOfMemory, #hipErrorInvalidValue + */ +hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags); +/** + * @brief Get Device pointer from Host Pointer allocated through hipHostMalloc + * + * @param[out] devPtr Device Pointer mapped to passed host pointer + * @param[in] hstPtr Host Pointer allocated through hipHostMalloc + * @param[in] flags Flags to be passed for extension + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorOutOfMemory + * + * @see hipSetDeviceFlags, hipHostMalloc + */ +hipError_t hipHostGetDevicePointer(void** devPtr, void* hstPtr, unsigned int flags); +/** + * @brief Return flags associated with host pointer + * + * @param[out] flagsPtr Memory location to store flags + * @param[in] hostPtr Host Pointer allocated through hipHostMalloc + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipHostMalloc + */ +hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr); +/** + * @brief Register host memory so it can be accessed from the current device. + * + * @param[out] hostPtr Pointer to host memory to be registered. + * @param[in] sizeBytes Size of the host memory + * @param[in] flags See below. + * + * Flags: + * - #hipHostRegisterDefault Memory is Mapped and Portable + * - #hipHostRegisterPortable Memory is considered registered by all contexts. HIP only supports + * one context so this is always assumed true. + * - #hipHostRegisterMapped Map the allocation into the address space for the current device. + * The device pointer can be obtained with #hipHostGetDevicePointer. + * - #hipExtHostRegisterUncached Map the host memory onto extended fine grained access system + * memory pool. + * + * After registering the memory, use #hipHostGetDevicePointer to obtain the mapped device pointer. + * On many systems, the mapped device pointer will have a different value than the mapped host + * pointer. Applications must use the device pointer in device code, and the host pointer in host + * code. + * + * On some systems, registered memory is pinned. On some systems, registered memory may not be + * actually be pinned but uses OS or hardware facilities to all GPU access to the host memory. + * + * Developers are strongly encouraged to register memory blocks which are aligned to the host + * cache-line size. (typically 64-bytes but can be obtains from the CPUID instruction). + * + * If registering non-aligned pointers, the application must take care when register pointers from + * the same cache line on different devices. HIP's coarse-grained synchronization model does not + * guarantee correct results if different devices write to different parts of the same cache block - + * typically one of the writes will "win" and overwrite data from the other registered memory + * region. + * + * @returns #hipSuccess, #hipErrorOutOfMemory + * + * @see hipHostUnregister, hipHostGetFlags, hipHostGetDevicePointer + */ +hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags); +/** + * @brief Un-register host pointer + * + * @param[in] hostPtr Host pointer previously registered with #hipHostRegister + * @returns Error code + * + * @see hipHostRegister + */ +hipError_t hipHostUnregister(void* hostPtr); +/** + * Allocates at least width (in bytes) * height bytes of linear memory + * Padding may occur to ensure alighnment requirements are met for the given row + * The change in width size due to padding will be returned in *pitch. + * Currently the alignment is set to 128 bytes + * + * @param[out] ptr Pointer to the allocated device memory + * @param[out] pitch Pitch for allocation (in bytes) + * @param[in] width Requested pitched allocation width (in bytes) + * @param[in] height Requested pitched allocation height + * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * + * @returns Error code + * + * @see hipMalloc, hipFree, hipMallocArray, hipFreeArray, hipHostFree, hipMalloc3D, + * hipMalloc3DArray, hipHostMalloc + */ +hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height); +/** + * Allocates at least width (in bytes) * height bytes of linear memory + * Padding may occur to ensure alighnment requirements are met for the given row + * The change in width size due to padding will be returned in *pitch. + * Currently the alignment is set to 128 bytes + * + * @param[out] dptr Pointer to the allocated device memory + * @param[out] pitch Pitch for allocation (in bytes) + * @param[in] widthInBytes Requested pitched allocation width (in bytes) + * @param[in] height Requested pitched allocation height + * @param[in] elementSizeBytes The size of element bytes, should be 4, 8 or 16 + * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * The intended usage of pitch is as a separate parameter of the allocation, used to compute + * addresses within the 2D array. Given the row and column of an array element of type T, the + * address is computed as: T* pElement = (T*)((char*)BaseAddress + Row * Pitch) + Column; + * + * @returns Error code + * + * @see hipMalloc, hipFree, hipMallocArray, hipFreeArray, hipHostFree, hipMalloc3D, + * hipMalloc3DArray, hipHostMalloc + */ +hipError_t hipMemAllocPitch(hipDeviceptr_t* dptr, size_t* pitch, size_t widthInBytes, size_t height, + unsigned int elementSizeBytes); +/** + * @brief Free memory allocated by the HIP-Clang hip memory allocation API. + * This API performs an implicit hipDeviceSynchronize() call. + * If pointer is NULL, the hip runtime is initialized and hipSuccess is returned. + * + * @param[in] ptr Pointer to memory to be freed + * @returns #hipSuccess + * @returns #hipErrorInvalidDevicePointer (if pointer is invalid, including host pointers allocated + * with hipHostMalloc) + * + * @see hipMalloc, hipMallocPitch, hipMallocArray, hipFreeArray, hipHostFree, hipMalloc3D, + * hipMalloc3DArray, hipHostMalloc + */ +hipError_t hipFree(void* ptr); +/** + * @brief Frees page-locked memory + * This API performs an implicit hipDeviceSynchronize() call. + * If pointer is NULL, the hip runtime is initialized and hipSuccess is returned. + * + * @param[in] ptr Pointer to memory to be freed + * @returns #hipSuccess, + * #hipErrorInvalidValue (if pointer is invalid, including device pointers allocated + * with hipMalloc) + * + */ +hipError_t hipFreeHost(void* ptr); +/** + * @brief Free memory allocated by the HIP-Clang hip host memory allocation API + * This API performs an implicit hipDeviceSynchronize() call. + * If pointer is NULL, the hip runtime is initialized and hipSuccess is returned. + * + * @ingroup MemoryD + * + * @param[in] ptr Pointer to memory to be freed + * @returns #hipSuccess, + * #hipErrorInvalidValue (if pointer is invalid, including device pointers allocated with + * hipMalloc) + * + * @see hipMalloc, hipMallocPitch, hipFree, hipMallocArray, hipFreeArray, hipMalloc3D, + * hipMalloc3DArray, hipHostMalloc + * + */ +hipError_t hipHostFree(void* ptr); +/** + * @brief Copy data from src to dst. + * + * It supports memory from host to device, + * device to host, device to device and host to host + * The src and dst must not overlap. + * + * For hipMemcpy, the copy is always performed by the current device (set by hipSetDevice). + * For multi-gpu or peer-to-peer configurations, it is recommended to set the current device to the + * device where the src data is physically located. For optimal peer-to-peer copies, the copy + * device must be able to access the src and dst pointers (by calling hipDeviceEnablePeerAccess with + * copy agent as the current device and src/dst as the peerDevice argument. if this is not done, + * the hipMemcpy will still work, but will perform the copy using a staging buffer on the host. + * Calling hipMemcpy with dst and src pointers that do not match the hipMemcpyKind results in + * undefined behavior. + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * @param[in] kind Kind of transfer + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorUnknown + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind); +/** + * @brief Memory copy on the stream. + * It allows single or multiple devices to do memory copy on single or multiple streams. + * The operation is akin to hipMemcpyAsync + hipStreamSynchronize. + * Since it is a sync API, it is not allowed during graph capture. + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * @param[in] kind Kind of transfer + * @param[in] stream Valid stream + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorUnknown, #hipErrorContextIsDestroyed + * + * @see hipMemcpy, hipStreamCreate, hipStreamSynchronize, hipStreamDestroy, hipSetDevice, + * hipLaunchKernelGGL + * + */ +hipError_t hipMemcpyWithStream(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, + hipStream_t stream); +/** + * @brief Copy data from Host to Device + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, const void* src, size_t sizeBytes); +/** + * @brief Copy data from Device to Host + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t sizeBytes); +/** + * @brief Copy data from Device to Device + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes); +/** + * @brief Copies from one 1D array to device memory. + * + * @param[out] dstDevice Destination device pointer + * @param[in] srcArray Source array + * @param[in] srcOffset Offset in bytes of source array + * @param[in] ByteCount Size of memory copy in bytes + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyAtoD(hipDeviceptr_t dstDevice, hipArray_t srcArray, size_t srcOffset, + size_t ByteCount); +/** + * @brief Copies from device memory to a 1D array. + * + * @param[out] dstArray Destination array + * @param[in] dstOffset Offset in bytes of destination array + * @param[in] srcDevice Source device pointer + * @param[in] ByteCount Size of memory copy in bytes + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyDtoA(hipArray_t dstArray, size_t dstOffset, hipDeviceptr_t srcDevice, + size_t ByteCount); + +/** + * @brief Copies from one 1D array to another. + * + * @param[out] dstArray Destination array + * @param[in] dstOffset Offset in bytes of destination array + * @param[in] srcArray Source array + * @param[in] srcOffset Offset in bytes of source array + * @param[in] ByteCount Size of memory copy in bytes + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyAtoA(hipArray_t dstArray, size_t dstOffset, hipArray_t srcArray, + size_t srcOffset, size_t ByteCount); +/** + * @brief Copy data from Host to Device asynchronously + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * @param[in] stream Stream identifier + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, const void* src, size_t sizeBytes, + hipStream_t stream); +/** + * @brief Copy data from Device to Host asynchronously + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * @param[in] stream Stream identifier + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream); +/** + * @brief Copy data from Device to Device asynchronously + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * @param[in] stream Stream identifier + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes, + hipStream_t stream); +/** + * @brief Copies from one 1D array to host memory. + * + * @param[out] dstHost Destination pointer + * @param[in] srcArray Source array + * @param[in] srcOffset Offset in bytes of source array + * @param[in] ByteCount Size of memory copy in bytes + * @param[in] stream Stream identifier + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyAtoHAsync(void* dstHost, hipArray_t srcArray, size_t srcOffset, + size_t ByteCount, hipStream_t stream); +/** + * @brief Copies from host memory to a 1D array. + * + * @param[out] dstArray Destination array + * @param[in] dstOffset Offset in bytes of destination array + * @param[in] srcHost Source host pointer + * @param[in] ByteCount Size of memory copy in bytes + * @param[in] stream Stream identifier + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyHtoAAsync(hipArray_t dstArray, size_t dstOffset, const void* srcHost, + size_t ByteCount, hipStream_t stream); +/** + * @brief Returns a global pointer from a module. + * @ingroup Module + * + * Returns in *dptr and *bytes the pointer and size of the global of name name located in module + * hmod. If no variable of that name exists, it returns hipErrorNotFound. Both parameters dptr and + * bytes are optional. If one of them is NULL, it is ignored and hipSuccess is returned. + * + * @param[out] dptr Returns global device pointer + * @param[out] bytes Returns global size in bytes + * @param[in] hmod Module to retrieve global from + * @param[in] name Name of global to retrieve + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotFound, #hipErrorInvalidContext + * + */ +hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, + const char* name); + +/** + * @brief Gets device pointer associated with symbol on the device. + * + * @param[out] devPtr pointer to the device associated the symbole + * @param[in] symbol pointer to the symbole of the device + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGetSymbolAddress(void** devPtr, const void* symbol); + + +/** + * @brief Gets the size of the given symbol on the device. + * + * @param[in] symbol pointer to the device symbole + * @param[out] size pointer to the size + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGetSymbolSize(size_t* size, const void* symbol); + +/** + * @brief Gets the pointer of requested HIP driver function. + * + * @param[in] symbol The Symbol name of the driver function to request. + * @param[out] pfn Output pointer to the requested driver function. + * @param[in] hipVersion The HIP version for the requested driver function symbol. + * HIP version is defined as 100*version_major + version_minor. For example, in HIP 6.1, the + * hipversion is 601, for the symbol function "hipGetDeviceProperties", the specified hipVersion 601 + * is greater or equal to the version 600, the symbol function will be handle properly as backend + * compatible function. + * + * @param[in] flags Currently only default flag is suppported. + * @param[out] symbolStatus Optional enumeration for returned status of searching for symbol driver + * function based on the input hipVersion. + * + * Returns hipSuccess if the returned pfn is addressed to the pointer of found driver function. + * + * @returns #hipSuccess, #hipErrorInvalidValue. + */ +hipError_t hipGetProcAddress(const char* symbol, void** pfn, int hipVersion, uint64_t flags, + hipDriverProcAddressQueryResult* symbolStatus); + +/** + * @brief Copies data to the given symbol on the device. + * Symbol HIP APIs allow a kernel to define a device-side data symbol which can be accessed on + * the host side. The symbol can be in __constant or device space. + * Note that the symbol name needs to be encased in the HIP_SYMBOL macro. + * This also applies to hipMemcpyFromSymbol, hipGetSymbolAddress, and hipGetSymbolSize. + * For detailed usage, see the + * memcpyToSymbol + * example in the HIP Porting Guide. + * + * + * @param[out] symbol pointer to the device symbole + * @param[in] src pointer to the source address + * @param[in] sizeBytes size in bytes to copy + * @param[in] offset offset in bytes from start of symbole + * @param[in] kind type of memory transfer + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipMemcpyToSymbol(const void* symbol, const void* src, size_t sizeBytes, + size_t offset __dparm(0), + hipMemcpyKind kind __dparm(hipMemcpyHostToDevice)); + +/** + * @brief Copies data to the given symbol on the device asynchronously. + * + * @param[out] symbol pointer to the device symbole + * @param[in] src pointer to the source address + * @param[in] sizeBytes size in bytes to copy + * @param[in] offset offset in bytes from start of symbole + * @param[in] kind type of memory transfer + * @param[in] stream stream identifier + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src, size_t sizeBytes, + size_t offset, hipMemcpyKind kind, hipStream_t stream __dparm(0)); + +/** + * @brief Copies data from the given symbol on the device. + * + * @param[out] dst Returns pointer to destinition memory address + * @param[in] symbol Pointer to the symbole address on the device + * @param[in] sizeBytes Size in bytes to copy + * @param[in] offset Offset in bytes from the start of symbole + * @param[in] kind Type of memory transfer + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipMemcpyFromSymbol(void* dst, const void* symbol, size_t sizeBytes, + size_t offset __dparm(0), + hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost)); + +/** + * @brief Copies data from the given symbol on the device asynchronously. + * + * @param[out] dst Returns pointer to destinition memory address + * @param[in] symbol pointer to the symbole address on the device + * @param[in] sizeBytes size in bytes to copy + * @param[in] offset offset in bytes from the start of symbole + * @param[in] kind type of memory transfer + * @param[in] stream stream identifier + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbol, size_t sizeBytes, size_t offset, + hipMemcpyKind kind, hipStream_t stream __dparm(0)); +/** + * @brief Copies data from src to dst asynchronously. + * + * The copy is always performed by the device associated with the specified stream. + * + * For multi-gpu or peer-to-peer configurations, it is recommended to use a stream which is + * attached to the device where the src data is physically located. + * For optimal peer-to-peer copies, the copy device must be able to access the src and dst + * pointers (by calling hipDeviceEnablePeerAccess) with copy agent as the current device and + * src/dest as the peerDevice argument. If enabling device peer access is not done, the memory copy + * will still work, but will perform the copy using a staging buffer on the host. + * + * @note If host or dst are not pinned, the memory copy will be performed synchronously. For + * best performance, use hipHostMalloc to allocate host memory that is transferred asynchronously. + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * @param[in] kind Type of memory transfer + * @param[in] stream Stream identifier + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorUnknown + * + * @see hipMemcpy, hipMemcpy2D, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, + * hipMemcpy2DFromArray, hipMemcpyArrayToArray, hipMemcpy2DArrayToArray, hipMemcpyToSymbol, + * hipMemcpyFromSymbol, hipMemcpy2DAsync, hipMemcpyToArrayAsync, hipMemcpy2DToArrayAsync, + * hipMemcpyFromArrayAsync, hipMemcpy2DFromArrayAsync, hipMemcpyToSymbolAsync, + * hipMemcpyFromSymbolAsync + */ +hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, + hipStream_t stream __dparm(0)); +/** + * @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant + * byte value value. + * + * @param[out] dst Data being filled + * @param[in] value Value to be set + * @param[in] sizeBytes Data size in bytes + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + */ +hipError_t hipMemset(void* dst, int value, size_t sizeBytes); +/** + * @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant + * byte value value. + * + * @param[out] dest Data ptr to be filled + * @param[in] value Value to be set + * @param[in] count Number of values to be set + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + */ +hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t count); +/** + * @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant + * byte value value. + * + * hipMemsetD8Async() is asynchronous with respect to the host, so the call may return before the + * memset is complete. The operation can optionally be associated to a stream by passing a non-zero + * stream argument. If stream is non-zero, the operation may overlap with operations in other + * streams. + * + * @param[out] dest Data ptr to be filled + * @param[in] value Constant value to be set + * @param[in] count Number of values to be set + * @param[in] stream Stream identifier + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + */ +hipError_t hipMemsetD8Async(hipDeviceptr_t dest, unsigned char value, size_t count, + hipStream_t stream __dparm(0)); +/** + * @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant + * short value value. + * + * @param[out] dest Data ptr to be filled + * @param[in] value Constant value to be set + * @param[in] count Number of values to be set + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + */ +hipError_t hipMemsetD16(hipDeviceptr_t dest, unsigned short value, size_t count); +/** + * @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant + * short value value. + * + * hipMemsetD16Async() is asynchronous with respect to the host, so the call may return before the + * memset is complete. The operation can optionally be associated to a stream by passing a non-zero + * stream argument. If stream is non-zero, the operation may overlap with operations in other + * streams. + * + * @param[out] dest Data ptr to be filled + * @param[in] value Constant value to be set + * @param[in] count Number of values to be set + * @param[in] stream Stream identifier + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + */ +hipError_t hipMemsetD16Async(hipDeviceptr_t dest, unsigned short value, size_t count, + hipStream_t stream __dparm(0)); +/** + * @brief Fills the memory area pointed to by dest with the constant integer + * value for specified number of times. + * + * @param[out] dest Data being filled + * @param[in] value Constant value to be set + * @param[in] count Number of values to be set + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + */ +hipError_t hipMemsetD32(hipDeviceptr_t dest, int value, size_t count); +/** + * @brief Fills the first sizeBytes bytes of the memory area pointed to by dev with the constant + * byte value value. + * + * hipMemsetAsync() is asynchronous with respect to the host, so the call may return before the + * memset is complete. The operation can optionally be associated to a stream by passing a non-zero + * stream argument. If stream is non-zero, the operation may overlap with operations in other + * streams. + * + * @param[out] dst Pointer to device memory + * @param[in] value Value to set for each byte of specified memory + * @param[in] sizeBytes Size in bytes to set + * @param[in] stream Stream identifier + * @return #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream __dparm(0)); +/** + * @brief Fills the memory area pointed to by dev with the constant integer + * value for specified number of times. + * + * hipMemsetD32Async() is asynchronous with respect to the host, so the call may return before the + * memset is complete. The operation can optionally be associated to a stream by passing a non-zero + * stream argument. If stream is non-zero, the operation may overlap with operations in other + * streams. + * + * @param[out] dst Pointer to device memory + * @param[in] value Value to set for each byte of specified memory + * @param[in] count Number of values to be set + * @param[in] stream Stream identifier + * @return #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemsetD32Async(hipDeviceptr_t dst, int value, size_t count, + hipStream_t stream __dparm(0)); +/** + * @brief Fills the memory area pointed to by dst with the constant value. + * + * @param[out] dst Pointer to 2D device memory + * @param[in] pitch Pitch size in bytes of 2D device memory, unused if height equals 1 + * @param[in] value Constant value to set for each byte of specified memory + * @param[in] width Width size in bytes in 2D memory + * @param[in] height Height size in bytes in 2D memory + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height); +/** + * @brief Fills asynchronously the memory area pointed to by dst with the constant value. + * + * @param[in] dst Pointer to 2D device memory + * @param[in] pitch Pitch size in bytes of 2D device memory, unused if height equals 1 + * @param[in] value Value to set for each byte of specified memory + * @param[in] width Width size in bytes in 2D memory + * @param[in] height Height size in bytes in 2D memory + * @param[in] stream Stream identifier + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, size_t width, size_t height, + hipStream_t stream __dparm(0)); +/** + * @brief Fills synchronously the memory area pointed to by pitchedDevPtr with the constant value. + * + * @param[in] pitchedDevPtr Pointer to pitched device memory + * @param[in] value Value to set for each byte of specified memory + * @param[in] extent Size parameters for width field in bytes in device memory + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent); +/** + * @brief Fills asynchronously the memory area pointed to by pitchedDevPtr with the constant value. + * + * @param[in] pitchedDevPtr Pointer to pitched device memory + * @param[in] value Value to set for each byte of specified memory + * @param[in] extent Size parameters for width field in bytes in device memory + * @param[in] stream Stream identifier + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, + hipStream_t stream __dparm(0)); + +/** + * @brief Fills 2D memory range of 'width' 8-bit values synchronously to the specified char value. + * Height specifies numbers of rows to set and dstPitch speicifies the number of bytes between each + * row. + * @param[in] dst Pointer to device memory + * @param[in] dstPitch Pitch of dst device pointer + * @param[in] value value to set + * @param[in] width Width of row + * @param[in] height Number of rows + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemsetD2D8(hipDeviceptr_t dst, size_t dstPitch, unsigned char value, size_t width, + size_t height); +/** + * @brief Fills 2D memory range of 'width' 8-bit values asynchronously to the specified char value. + * Height specifies numbers of rows to set and dstPitch speicifies the number of bytes between each + * row. + * @param[in] dst Pointer to device memory + * @param[in] dstPitch Pitch of dst device pointer + * @param[in] value value to set + * @param[in] width Width of row + * @param[in] height Number of rows + * @param[in] stream Stream Identifier + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemsetD2D8Async(hipDeviceptr_t dst, size_t dstPitch, unsigned char value, + size_t width, size_t height, hipStream_t stream __dparm(0)); + +/** + * @brief Fills 2D memory range of 'width' 16-bit values synchronously to the specified short + * value. Height specifies numbers of rows to set and dstPitch speicifies the number of bytes + * between each row. + * @param[in] dst Pointer to device memory + * @param[in] dstPitch Pitch of dst device pointer + * @param[in] value value to set + * @param[in] width Width of row + * @param[in] height Number of rows + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemsetD2D16(hipDeviceptr_t dst, size_t dstPitch, unsigned short value, size_t width, + size_t height); +/** + * @brief Fills 2D memory range of 'width' 16-bit values asynchronously to the specified short + * value. Height specifies numbers of rows to set and dstPitch speicifies the number of bytes + * between each row. + * @param[in] dst Pointer to device memory + * @param[in] dstPitch Pitch of dst device pointer + * @param[in] value value to set + * @param[in] width Width of row + * @param[in] height Number of rows + * @param[in] stream Stream Identifier + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemsetD2D16Async(hipDeviceptr_t dst, size_t dstPitch, unsigned short value, + size_t width, size_t height, hipStream_t stream __dparm(0)); +/** + * @brief Fills 2D memory range of 'width' 32-bit values synchronously to the specified int value. + * Height specifies numbers of rows to set and dstPitch speicifies the number of bytes between each + * row. + * @param[in] dst Pointer to device memory + * @param[in] dstPitch Pitch of dst device pointer + * @param[in] value value to set + * @param[in] width Width of row + * @param[in] height Number of rows + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemsetD2D32(hipDeviceptr_t dst, size_t dstPitch, unsigned int value, size_t width, + size_t height); +/** + * @brief Fills 2D memory range of 'width' 32-bit values asynchronously to the specified int + * value. Height specifies numbers of rows to set and dstPitch speicifies the number of bytes + * between each row. + * @param[in] dst Pointer to device memory + * @param[in] dstPitch Pitch of dst device pointer + * @param[in] value value to set + * @param[in] width Width of row + * @param[in] height Number of rows + * @param[in] stream Stream Identifier + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemsetD2D32Async(hipDeviceptr_t dst, size_t dstPitch, unsigned int value, + size_t width, size_t height, hipStream_t stream __dparm(0)); + +/** + * @brief Query memory info. + * + * On ROCM, this function gets the actual free memory left on the current device, so supports + * the cases while running multi-workload (such as multiple processes, multiple threads, and + * multiple GPUs). + * + * @warning On Windows, the free memory only accounts for memory allocated by this process and may + * be optimistic. + * + * @param[out] free Returns free memory on the current device in bytes + * @param[out] total Returns total allocatable memory on the current device in bytes + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + **/ +hipError_t hipMemGetInfo(size_t* free, size_t* total); + +/** + * @brief Get allocated memory size via memory pointer. + * + * This function gets the allocated shared virtual memory size from memory pointer. + * + * @param[in] ptr Pointer to allocated memory + * @param[out] size Returns the allocated memory size in bytes + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + **/ +hipError_t hipMemPtrGetInfo(void* ptr, size_t* size); +/** + * @brief Allocate an array on the device. + * + * @param[out] array Pointer to allocated array in device memory + * @param[in] desc Requested channel format + * @param[in] width Requested array allocation width + * @param[in] height Requested array allocation height + * @param[in] flags Requested properties of allocated array + * @returns #hipSuccess, #hipErrorOutOfMemory + * + * @see hipMalloc, hipMallocPitch, hipFree, hipFreeArray, hipHostMalloc, hipHostFree + */ +hipError_t hipMallocArray(hipArray_t* array, const hipChannelFormatDesc* desc, size_t width, + size_t height __dparm(0), unsigned int flags __dparm(hipArrayDefault)); +/** + * @brief Create an array memory pointer on the device. + * + * @param[out] pHandle Pointer to the array memory + * @param[in] pAllocateArray Requested array desciptor + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @see hipMallocArray, hipArrayDestroy, hipFreeArray + */ +hipError_t hipArrayCreate(hipArray_t* pHandle, const HIP_ARRAY_DESCRIPTOR* pAllocateArray); +/** + * @brief Destroy an array memory pointer on the device. + * + * @param[in] array Pointer to the array memory + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipFreeArray + */ +hipError_t hipArrayDestroy(hipArray_t array); +/** + * @brief Create a 3D array memory pointer on the device. + * + * @param[out] array Pointer to the 3D array memory + * @param[in] pAllocateArray Requested array desciptor + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @see hipMallocArray, hipArrayDestroy, hipFreeArray + */ +hipError_t hipArray3DCreate(hipArray_t* array, const HIP_ARRAY3D_DESCRIPTOR* pAllocateArray); +/** + * @brief Create a 3D memory pointer on the device. + * + * @param[out] pitchedDevPtr Pointer to the 3D memory + * @param[in] extent Requested extent + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @see hipMallocPitch, hipMemGetInfo, hipFree + */ +hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent); +/** + * @brief Frees an array on the device. + * + * @param[in] array Pointer to array to free + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + * + * @see hipMalloc, hipMallocPitch, hipFree, hipMallocArray, hipHostMalloc, hipHostFree + */ +hipError_t hipFreeArray(hipArray_t array); +/** + * @brief Allocate an array on the device. + * + * @param[out] array Pointer to allocated array in device memory + * @param[in] desc Requested channel format + * @param[in] extent Requested array allocation width, height and depth + * @param[in] flags Requested properties of allocated array + * @returns #hipSuccess, #hipErrorOutOfMemory + * + * @see hipMalloc, hipMallocPitch, hipFree, hipFreeArray, hipHostMalloc, hipHostFree + */ +hipError_t hipMalloc3DArray(hipArray_t* array, const struct hipChannelFormatDesc* desc, + struct hipExtent extent, unsigned int flags); +/** + * @brief Gets info about the specified array + * + * @param[out] desc - Returned array type + * @param[out] extent - Returned array shape. 2D arrays will have depth of zero + * @param[out] flags - Returned array flags + * @param[in] array - The HIP array to get info for + * + * @returns #hipSuccess, #hipErrorInvalidValue #hipErrorInvalidHandle + * + * @see hipArrayGetDescriptor, hipArray3DGetDescriptor + */ +hipError_t hipArrayGetInfo(hipChannelFormatDesc* desc, hipExtent* extent, unsigned int* flags, + hipArray_t array); +/** + * @brief Gets a 1D or 2D array descriptor + * + * @param[out] pArrayDescriptor - Returned array descriptor + * @param[in] array - Array to get descriptor of + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue #hipErrorInvalidHandle + * + * @see hipArray3DCreate, hipArray3DGetDescriptor, hipArrayCreate, hipArrayDestroy, hipMemAlloc, + * hipMemAllocHost, hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, + * hipMemcpy3D, hipMemcpy3DAsync, hipMemcpyAtoA, hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, + * hipMemcpyDtoA, hipMemcpyDtoD, hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, + * hipMemcpyHtoA, hipMemcpyHtoAAsync, hipMemcpyHtoD, hipMemcpyHtoDAsync, hipMemFree, + * hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, hipMemHostAlloc, + * hipMemHostGetDevicePointer, hipMemsetD8, hipMemsetD16, hipMemsetD32, hipArrayGetInfo + */ +hipError_t hipArrayGetDescriptor(HIP_ARRAY_DESCRIPTOR* pArrayDescriptor, hipArray_t array); +/** + * @brief Gets a 3D array descriptor + * + * @param[out] pArrayDescriptor - Returned 3D array descriptor + * @param[in] array - 3D array to get descriptor of + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue #hipErrorInvalidHandle, #hipErrorContextIsDestroyed + * + * @see hipArray3DCreate, hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, + * hipMemAllocHost, hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, + * hipMemcpy3D, hipMemcpy3DAsync, hipMemcpyAtoA, hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, + * hipMemcpyDtoA, hipMemcpyDtoD, hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, + * hipMemcpyHtoA, hipMemcpyHtoAAsync, hipMemcpyHtoD, hipMemcpyHtoDAsync, hipMemFree, + * hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, hipMemHostAlloc, + * hipMemHostGetDevicePointer, hipMemsetD8, hipMemsetD16, hipMemsetD32, hipArrayGetInfo + */ +hipError_t hipArray3DGetDescriptor(HIP_ARRAY3D_DESCRIPTOR* pArrayDescriptor, hipArray_t array); +/** + * @brief Copies data between host and device. + * + * hipMemcpy2D supports memory matrix copy from the pointed area src to the pointed area dst. + * The copy direction is defined by kind which must be one of #hipMemcpyHostToDevice, + * #hipMemcpyHostToDevice, #hipMemcpyDeviceToHost #hipMemcpyDeviceToDevice or #hipMemcpyDefault. + * Device to Device copies don't need to wait for host synchronization. + * The copy is executed on the default null tream. The src and dst must not overlap. + * dpitch and spitch are the widths in bytes in memory matrix, width cannot exceed dpitch or + * spitch. + * + * For hipMemcpy2D, the copy is always performed by the current device (set by hipSetDevice). + * For multi-gpu or peer-to-peer configurations, it is recommended to set the current device to the + * device where the src data is physically located. For optimal peer-to-peer copies, the copy device + * must be able to access the src and dst pointers (by calling hipDeviceEnablePeerAccess with copy + * agent as the current device and src/dst as the peerDevice argument. if this is not done, the + * hipMemcpy2D will still work, but will perform the copy using a staging buffer on the host. + * + * @warning Calling hipMemcpy2D with dst and src pointers that do not match the hipMemcpyKind + * results in undefined behavior. + * + * @param[in] dst Destination memory address + * @param[in] dpitch Pitch size in bytes of destination memory + * @param[in] src Source memory address + * @param[in] spitch Pitch size in bytes of source memory + * @param[in] width Width size in bytes of matrix transfer (columns) + * @param[in] height Height size in bytes of matrix transfer (rows) + * @param[in] kind Type of transfer + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind); +/** + * @brief Copies memory for 2D arrays. + * @param[in] pCopy Parameters for the memory copy + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2D, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, + * hipMemcpyToSymbol, hipMemcpyAsync + */ +hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy); +/** + * @brief Copies memory for 2D arrays. + * @param[in] pCopy Parameters for the memory copy + * @param[in] stream Stream to use + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2D, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, + * hipMemcpyToSymbol, hipMemcpyAsync + */ +hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D* pCopy, hipStream_t stream __dparm(0)); +/** + * @brief Copies data between host and device asynchronously. + * + * hipMemcpy2DAsync supports memory matrix copy from the pointed area src to the pointed area dst. + * The copy direction is defined by kind which must be one of #hipMemcpyHostToDevice, + * #hipMemcpyDeviceToHost, #hipMemcpyDeviceToDevice or #hipMemcpyDefault. + * dpitch and spitch are the widths in bytes for memory matrix corresponds to dst and src. + * width cannot exceed dpitch or spitch. + * + * The copy is always performed by the device associated with the specified stream. + * The API is asynchronous with respect to the host, so the call may return before the copy is + * complete. The copy can optionally be excuted in a specific stream by passing a non-zero stream + * argument, for HostToDevice or DeviceToHost copies, the copy can overlap with operations + * in other streams. + * + * For multi-gpu or peer-to-peer configurations, it is recommended to use a stream which is + * attached to the device where the src data is physically located. + * + * For optimal peer-to-peer copies, the copy device must be able to access the src and dst pointers + * (by calling hipDeviceEnablePeerAccess) with copy agent as the current device and src/dst as the + * peerDevice argument. If enabling device peer access is not done, the API will still work, but + * will perform the copy using a staging buffer on the host. + * + * @note If host or dst are not pinned, the memory copy will be performed synchronously. For + * best performance, use hipHostMalloc to allocate host memory that is transferred asynchronously. + * + * @param[in] dst Pointer to destination memory address + * @param[in] dpitch Pitch size in bytes of destination memory + * @param[in] src Pointer to source memory address + * @param[in] spitch Pitch size in bytes of source memory + * @param[in] width Width of matrix transfer (columns in bytes) + * @param[in] height Height of matrix transfer (rows) + * @param[in] kind Type of transfer + * @param[in] stream Stream to use + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind, hipStream_t stream __dparm(0)); +/** + * @brief Copies data between host and device. + * + * @param[in] dst Destination memory address + * @param[in] wOffset Destination starting X offset + * @param[in] hOffset Destination starting Y offset + * @param[in] src Source memory address + * @param[in] spitch Pitch of source memory + * @param[in] width Width of matrix transfer (columns in bytes) + * @param[in] height Height of matrix transfer (rows) + * @param[in] kind Type of transfer + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpyToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy2DToArray(hipArray_t dst, size_t wOffset, size_t hOffset, const void* src, + size_t spitch, size_t width, size_t height, hipMemcpyKind kind); +/** + * @brief Copies data between host and device. + * + * @param[in] dst Destination memory address + * @param[in] wOffset Destination starting X offset + * @param[in] hOffset Destination starting Y offset + * @param[in] src Source memory address + * @param[in] spitch Pitch of source memory + * @param[in] width Width of matrix transfer (columns in bytes) + * @param[in] height Height of matrix transfer (rows) + * @param[in] kind Type of transfer + * @param[in] stream Accelerator view which the copy is being enqueued + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpyToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy2DToArrayAsync(hipArray_t dst, size_t wOffset, size_t hOffset, const void* src, + size_t spitch, size_t width, size_t height, hipMemcpyKind kind, + hipStream_t stream __dparm(0)); +/** + * @brief Copies data between host and device. + * + * @param[in] dst Destination memory address + * @param[in] wOffsetDst Destination starting X offset + * @param[in] hOffsetDst Destination starting Y offset + * @param[in] src Source memory address + * @param[in] wOffsetSrc Source starting X offset + * @param[in] hOffsetSrc Source starting Y offset (columns in bytes) + * @param[in] width Width of matrix transfer (columns in bytes) + * @param[in] height Height of matrix transfer (rows) + * @param[in] kind Type of transfer + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpyToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy2DArrayToArray(hipArray_t dst, size_t wOffsetDst, size_t hOffsetDst, + hipArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, + size_t width, size_t height, hipMemcpyKind kind); +/** + * @brief Copies data between host and device [Deprecated] + * + * @ingroup MemoryD + * + * @param[in] dst Destination memory address + * @param[in] wOffset Destination starting X offset + * @param[in] hOffset Destination starting Y offset + * @param[in] src Source memory address + * @param[in] count size in bytes to copy + * @param[in] kind Type of transfer + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + * @warning This API is deprecated. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipMemcpyToArray(hipArray_t dst, size_t wOffset, size_t hOffset, const void* src, + size_t count, hipMemcpyKind kind); +/** + * @brief Copies data between host and device [Deprecated] + * + * @ingroup MemoryD + * + * @param[in] dst Destination memory address + * @param[in] srcArray Source memory address + * @param[in] wOffset Source starting X offset + * @param[in] hOffset Source starting Y offset + * @param[in] count Size in bytes to copy + * @param[in] kind Type of transfer + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + * @warning This API is deprecated. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, size_t wOffset, size_t hOffset, + size_t count, hipMemcpyKind kind); +/** + * @brief Copies data between host and device. + * + * @param[in] dst Destination memory address + * @param[in] dpitch Pitch of destination memory + * @param[in] src Source memory address + * @param[in] wOffset Source starting X offset + * @param[in] hOffset Source starting Y offset + * @param[in] width Width of matrix transfer (columns in bytes) + * @param[in] height Height of matrix transfer (rows) + * @param[in] kind Type of transfer + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy2DFromArray(void* dst, size_t dpitch, hipArray_const_t src, size_t wOffset, + size_t hOffset, size_t width, size_t height, hipMemcpyKind kind); +/** + * @brief Copies data between host and device asynchronously. + * + * @param[in] dst Destination memory address + * @param[in] dpitch Pitch of destination memory + * @param[in] src Source memory address + * @param[in] wOffset Source starting X offset + * @param[in] hOffset Source starting Y offset + * @param[in] width Width of matrix transfer (columns in bytes) + * @param[in] height Height of matrix transfer (rows) + * @param[in] kind Type of transfer + * @param[in] stream Accelerator view which the copy is being enqueued + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy2DFromArrayAsync(void* dst, size_t dpitch, hipArray_const_t src, size_t wOffset, + size_t hOffset, size_t width, size_t height, + hipMemcpyKind kind, hipStream_t stream __dparm(0)); +/** + * @brief Copies data between host and device. + * + * @param[in] dst Destination memory address + * @param[in] srcArray Source array + * @param[in] srcOffset Offset in bytes of source array + * @param[in] count Size of memory copy in bytes + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpyAtoH(void* dst, hipArray_t srcArray, size_t srcOffset, size_t count); +/** + * @brief Copies data between host and device. + * + * @param[in] dstArray Destination memory address + * @param[in] dstOffset Offset in bytes of destination array + * @param[in] srcHost Source host pointer + * @param[in] count Size of memory copy in bytes + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpyHtoA(hipArray_t dstArray, size_t dstOffset, const void* srcHost, size_t count); +/** + * @brief Copies data between host and device. + * + * @param[in] p 3D memory copy parameters + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p); +/** + * @brief Copies data between host and device asynchronously. + * + * @param[in] p 3D memory copy parameters + * @param[in] stream Stream to use + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms* p, hipStream_t stream __dparm(0)); +/** + * @brief Copies data between host and device. + * + * @param[in] pCopy 3D memory copy parameters + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipDrvMemcpy3D(const HIP_MEMCPY3D* pCopy); +/** + * @brief Copies data between host and device asynchronously. + * + * @param[in] pCopy 3D memory copy parameters + * @param[in] stream Stream to use + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipDrvMemcpy3DAsync(const HIP_MEMCPY3D* pCopy, hipStream_t stream); +/** + * @brief Get information on memory allocations. + * + * @param [out] pbase - BAse pointer address + * @param [out] psize - Size of allocation + * @param [in] dptr- Device Pointer + * + * @returns #hipSuccess, #hipErrorNotFound + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + */ +hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize, hipDeviceptr_t dptr); + +/** + * @brief Perform Batch of 1D copies + * + * @param [in] dsts - Array of destination pointers + * @param [in] srcs - Array of source pointers. + * @param [in] sizes - Array of sizes for memcpy operations + * @param [in] count - Size of dsts, srcs and sizes arrays + * @param [in] attrs - Array of memcpy attributes (not supported) + * @param [in] attrsIdxs - Array of indices to map attrs to copies (not supported) + * @param [in] numAttrs - Size of attrs and attrsIdxs arrays (not supported) + * @param [in] failIdx - Pointer to a location to return failure index inside the batch + * @param [in] stream - stream used to enqueue operations in. + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemcpyBatchAsync(void** dsts, void** srcs, size_t* sizes, size_t count, + hipMemcpyAttributes* attrs, size_t* attrsIdxs, size_t numAttrs, + size_t* failIdx, hipStream_t stream __dparm(0)); + +/** + * @brief Perform Batch of 3D copies + * + * @param [in] numOps - Total number of memcpy operations. + * @param [in] opList - Array of size numOps containing the actual memcpy operations. + * @param [in] failIdx - Pointer to a location to return the index of the copy where a failure + * - was encountered. + * @param [in] flags - Flags for future use, must be zero now. + * @param [in] stream - The stream to enqueue the operations in. + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemcpy3DBatchAsync(size_t numOps, struct hipMemcpy3DBatchOp* opList, size_t* failIdx, + unsigned long long flags, hipStream_t stream __dparm(0)); + +/** + * @brief Performs 3D memory copies between devices + * This API is asynchronous with respect to host + * + * @param [in] p - Parameters for memory copy + * + * @returns #hipSuccess, #hipErrorInvalidValue, hipErrorInvalidDevice + */ +hipError_t hipMemcpy3DPeer(hipMemcpy3DPeerParms* p); + +/** + * @brief Performs 3D memory copies between devices asynchronously + * + * @param [in] p - Parameters for memory copy + * @param [in] stream - Stream to enqueue operation in. + * + * @returns #hipSuccess, #hipErrorInvalidValue, hipErrorInvalidDevice + */ +hipError_t hipMemcpy3DPeerAsync(hipMemcpy3DPeerParms* p, hipStream_t stream __dparm(0)); +// doxygen end Memory +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup PeerToPeer PeerToPeer Device Memory Access + * @{ + * @ingroup API + * This section describes the PeerToPeer device memory access functions of HIP runtime API. + */ +/** + * @brief Determines if a device can access a peer device's memory. + * + * @param [out] canAccessPeer - Returns the peer access capability (0 or 1) + * @param [in] deviceId - The device accessing the peer device memory. + * @param [in] peerDeviceId - Peer device where memory is physically located + * + * The value of @p canAccessPeer, + * + * Returns "1" if the specified @p deviceId is capable of directly accessing memory physically + * located on @p peerDeviceId, + * + * Returns "0" if the specified @p deviceId is not capable of directly accessing memory physically + * located on @p peerDeviceId. + * + * Returns "0" if @p deviceId == @p peerDeviceId, both are valid devices, + * however, a device is not a peer of itself. + * + * Returns #hipErrorInvalidDevice if deviceId or peerDeviceId are not valid devices + * + * @returns #hipSuccess, #hipErrorInvalidDevice + * + */ +hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int deviceId, int peerDeviceId); +/** + * @brief Enables direct access to memory allocations on a peer device. + * + * When this API is successful, all memory allocations on peer device will be mapped into the + * address space of the current device. In addition, any future memory allocation on the + * peer device will remain accessible from the current device, until the access is disabled using + * hipDeviceDisablePeerAccess or device is reset using hipDeviceReset. + * + * @param [in] peerDeviceId - Peer device to enable direct access to from the current device + * @param [in] flags - Reserved for future use, must be zero + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue, + * @returns #hipErrorPeerAccessAlreadyEnabled if peer access is already enabled for this device. + */ +hipError_t hipDeviceEnablePeerAccess(int peerDeviceId, unsigned int flags); +/** + * @brief Disables direct access to memory allocations on a peer device. + * + * If direct access to memory allocations on peer device has not been enabled yet from the current + * device, it returns #hipErrorPeerAccessNotEnabled. + * + * @param [in] peerDeviceId Peer device to disable direct access to + * + * @returns #hipSuccess, #hipErrorPeerAccessNotEnabled + */ +hipError_t hipDeviceDisablePeerAccess(int peerDeviceId); + +/** + * @brief Copies memory between two peer accessible devices. + * + * @param [out] dst - Destination device pointer + * @param [in] dstDeviceId - Destination device + * @param [in] src - Source device pointer + * @param [in] srcDeviceId - Source device + * @param [in] sizeBytes - Size of memory copy in bytes + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice + */ +hipError_t hipMemcpyPeer(void* dst, int dstDeviceId, const void* src, int srcDeviceId, + size_t sizeBytes); +/** + * @brief Copies memory between two peer accessible devices asynchronously. + * + * @param [out] dst - Destination device pointer + * @param [in] dstDeviceId - Destination device + * @param [in] src - Source device pointer + * @param [in] srcDevice - Source device + * @param [in] sizeBytes - Size of memory copy in bytes + * @param [in] stream - Stream identifier + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice + */ +hipError_t hipMemcpyPeerAsync(void* dst, int dstDeviceId, const void* src, int srcDevice, + size_t sizeBytes, hipStream_t stream __dparm(0)); + +// doxygen end PeerToPeer +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Context Context Management [Deprecated] + * @{ + * This section describes the context management functions of HIP runtime API. + * + * @warning + * + * On the AMD platform, context management APIs are deprecated as there are better alternate + * interfaces, such as using hipSetDevice and stream APIs to achieve the required functionality. + * + * On the NVIDIA platform, CUDA supports the driver API that defines "Context" and "Devices" as + * separate entities. Each context contains a single device, which can theoretically have multiple + * contexts. HIP initially added limited support for these APIs to facilitate easy porting from + * existing driver codes. + * + * These APIs are only for equivalent driver APIs on the NVIDIA platform. + * + */ + +/** + * @brief Create a context and set it as current/default context + * + * @param [out] ctx Context to create + * @param [in] flags Context creation flags + * @param [in] device device handle + * + * @returns #hipSuccess + * + * @see hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, hipCtxPushCurrent, + * hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxCreate(hipCtx_t* ctx, unsigned int flags, hipDevice_t device); +/** + * @brief Destroy a HIP context [Deprecated] + * + * @param [in] ctx Context to destroy + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipCtxCreate, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,hipCtxSetCurrent, + * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize , hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxDestroy(hipCtx_t ctx); +/** + * @brief Pop the current/default context and return the popped context [Deprecated] + * + * @param [out] ctx The current context to pop + * + * @returns #hipSuccess, #hipErrorInvalidContext + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxSetCurrent, hipCtxGetCurrent, + * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxPopCurrent(hipCtx_t* ctx); +/** + * @brief Push the context to be set as current/ default context [Deprecated] + * + * @param [in] ctx The current context to push + * + * @returns #hipSuccess, #hipErrorInvalidContext + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize , hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxPushCurrent(hipCtx_t ctx); +/** + * @brief Set the passed context as current/default [Deprecated] + * + * @param [in] ctx The context to set as current + * + * @returns #hipSuccess, #hipErrorInvalidContext + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize , hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxSetCurrent(hipCtx_t ctx); +/** + * @brief Get the handle of the current/ default context [Deprecated] + * + * @param [out] ctx The context to get as current + * + * @returns #hipSuccess, #hipErrorInvalidContext + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetDevice, hipCtxGetFlags, hipCtxPopCurrent, + * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxGetCurrent(hipCtx_t* ctx); +/** + * @brief Get the handle of the device associated with current/default context [Deprecated] + * + * @param [out] device The device from the current context + * + * @returns #hipSuccess, #hipErrorInvalidContext + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxGetDevice(hipDevice_t* device); +/** + * @brief Returns the approximate HIP api version. + * + * @param [in] ctx Context to check [Deprecated] + * @param [out] apiVersion API version to get + * + * @returns #hipSuccess + * + * @warning The HIP feature set does not correspond to an exact CUDA SDK api revision. + * This function always set *apiVersion to 4 as an approximation though HIP supports + * some features which were introduced in later CUDA SDK revisions. + * HIP apps code should not rely on the api revision number here and should + * use arch feature flags to test device capabilities or conditional compilation. + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetDevice, hipCtxGetFlags, hipCtxPopCurrent, + * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxGetApiVersion(hipCtx_t ctx, unsigned int* apiVersion); +/** + * @brief Get Cache configuration for a specific function [Deprecated] + * + * @param [out] cacheConfig Cache configuration + * + * @returns #hipSuccess + * + * @warning AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is + * ignored on those architectures. + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxGetCacheConfig(hipFuncCache_t* cacheConfig); +/** + * @brief Set L1/Shared cache partition [Deprecated] + * + * @param [in] cacheConfig Cache configuration to set + * + * @return #hipSuccess + * + * @warning AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is + * ignored on those architectures. + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxSetCacheConfig(hipFuncCache_t cacheConfig); +/** + * @brief Set Shared memory bank configuration [Deprecated] + * + * @param [in] config Shared memory configuration to set + * + * @return #hipSuccess + * + * @warning AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is + * ignored on those architectures. + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config); +/** + * @brief Get Shared memory bank configuration [Deprecated] + * + * @param [out] pConfig Pointer of shared memory configuration + * + * @return #hipSuccess + * + * @warning AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is + * ignored on those architectures. + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig* pConfig); +/** + * @brief Blocks until the default context has completed all preceding requested tasks [Deprecated] + * + * @return #hipSuccess + * + * @warning This function waits for all streams on the default context to complete execution, and + * then returns. + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxSynchronize(void); +/** + * @brief Return flags used for creating default context [Deprecated] + * + * @param [out] flags Pointer of flags + * + * @returns #hipSuccess + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxPopCurrent, hipCtxGetCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxGetFlags(unsigned int* flags); +/** + * @brief Enables direct access to memory allocations in a peer context [Deprecated] + * + * Memory which already allocated on peer device will be mapped into the address space of the + * current device. In addition, all future memory allocations on peerDeviceId will be mapped into + * the address space of the current device when the memory is allocated. The peer memory remains + * accessible from the current device until a call to hipDeviceDisablePeerAccess or hipDeviceReset. + * + * + * @param [in] peerCtx Peer context + * @param [in] flags flags, need to set as 0 + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue, + * #hipErrorPeerAccessAlreadyEnabled + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * @warning PeerToPeer support is experimental. + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags); +/** + * @brief Disable direct access from current context's virtual address space to memory allocations + * physically located on a peer context.Disables direct access to memory allocations in a peer + * context and unregisters any registered allocations [Deprecated] + * + * Returns #hipErrorPeerAccessNotEnabled if direct access to memory on peerDevice has not yet been + * enabled from the current device. + * + * @param [in] peerCtx Peer context to be disabled + * + * @returns #hipSuccess, #hipErrorPeerAccessNotEnabled + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * @warning PeerToPeer support is experimental. + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx); + +/** + * @brief Get the state of the primary context [Deprecated] + * + * @param [in] dev Device to get primary context flags for + * @param [out] flags Pointer to store flags + * @param [out] active Pointer to store context state; 0 = inactive, 1 = active + * + * @returns #hipSuccess + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int* flags, int* active); +/** + * @brief Release the primary context on the GPU. + * + * @param [in] dev Device which primary context is released [Deprecated] + * + * @returns #hipSuccess + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * @warning This function return #hipSuccess though doesn't release the primaryCtx by design on + * HIP/HIP-CLANG path. + * + * @warning This API is deprecated on the AMD platform, only for equivalent driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev); +/** + * @brief Retain the primary context on the GPU [Deprecated] + * + * @param [out] pctx Returned context handle of the new context + * @param [in] dev Device which primary context is released + * + * @returns #hipSuccess + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev); +/** + * @brief Resets the primary context on the GPU [Deprecated] + * + * @param [in] dev Device which primary context is reset + * + * @returns #hipSuccess + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev); +/** + * @brief Set flags for the primary context [Deprecated] + * + * @param [in] dev Device for which the primary context flags are set + * @param [in] flags New flags for the device + * + * @returns #hipSuccess, #hipErrorContextAlreadyInUse + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags); +// doxygen end Context Management +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * + * @defgroup Module Module Management + * @{ + * @ingroup API + * This section describes the module management functions of HIP runtime API. + * + */ +/** + * @brief Loads fatbin object + * + * @param [in] fatbin fatbin to be loaded as a module + * @param [out] module Module + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidContext, #hipErrorFileNotFound, + * #hipErrorOutOfMemory, #hipErrorSharedObjectInitFailed, #hipErrorNotInitialized + * + */ +hipError_t hipModuleLoadFatBinary(hipModule_t* module, const void* fatbin); +/** + * @brief Loads code object from file into a module the currrent context. + * + * @param [in] fname Filename of code object to load + + * @param [out] module Module + * + * @warning File/memory resources allocated in this function are released only in hipModuleUnload. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidContext, #hipErrorFileNotFound, + * #hipErrorOutOfMemory, #hipErrorSharedObjectInitFailed, #hipErrorNotInitialized + * + */ +hipError_t hipModuleLoad(hipModule_t* module, const char* fname); +/** + * @brief Frees the module + * + * @param [in] module Module to free + * + * @returns #hipSuccess, #hipErrorInvalidResourceHandle + * + * The module is freed, and the code objects associated with it are destroyed. + */ +hipError_t hipModuleUnload(hipModule_t module); +/** + * @brief Function with kname will be extracted if present in module + * + * @param [in] module Module to get function from + * @param [in] kname Pointer to the name of function + * @param [out] function Pointer to function handle + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidContext, #hipErrorNotInitialized, + * #hipErrorNotFound, + */ +hipError_t hipModuleGetFunction(hipFunction_t* function, hipModule_t module, const char* kname); + +/** + * @brief Returns the number of functions within a module. + * + * @param [in] mod Module to get function count from + * @param [out] count function count from module + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidContext, #hipErrorNotInitialized, + * #hipErrorNotFound, + */ +hipError_t hipModuleGetFunctionCount(unsigned int* count, hipModule_t mod); + +/** + * @brief Load hip Library from inmemory object + * + * @param [out] library Output Library + * @param [in] code In memory object + * @param [in] jitOptions JIT options, CUDA only + * @param [in] jitOptionsValues JIT options values, CUDA only + * @param [in] numJitOptions Number of JIT options + * @param [in] libraryOptions Library options + * @param [in] libraryOptionValues Library options values + * @param [in] numLibraryOptions Number of library options + * @return #hipSuccess, #hipErrorInvalidValue, + */ +hipError_t hipLibraryLoadData(hipLibrary_t* library, const void* code, hipJitOption* jitOptions, + void** jitOptionsValues, unsigned int numJitOptions, + hipLibraryOption* libraryOptions, void** libraryOptionValues, + unsigned int numLibraryOptions); + +/** + * @brief Load hip Library from file + * + * @param [out] library Output Library + * @param [in] fileName file which contains code object + * @param [in] jitOptions JIT options, CUDA only + * @param [in] jitOptionsValues JIT options values, CUDA only + * @param [in] numJitOptions Number of JIT options + * @param [in] libraryOptions Library options + * @param [in] libraryOptionValues Library options values + * @param [in] numLibraryOptions Number of library options + * @return #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipLibraryLoadFromFile(hipLibrary_t* library, const char* fileName, + hipJitOption* jitOptions, void** jitOptionsValues, + unsigned int numJitOptions, hipLibraryOption* libraryOptions, + void** libraryOptionValues, unsigned int numLibraryOptions); + +/** + * @brief Unload HIP Library + * + * @param [in] library Input created hip library + * @return #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipLibraryUnload(hipLibrary_t library); + +/** + * @brief Get Kernel object from library + * + * @param [out] pKernel Output kernel object + * @param [in] library Input hip library + * @param [in] name kernel name to be searched for + * @return #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipLibraryGetKernel(hipKernel_t* pKernel, hipLibrary_t library, const char* name); + +/** + * @brief Get Kernel count in library + * + * @param [out] count Count of kernels in library + * @param [in] library Input created hip library + * @return #hipSuccess, #hipErrorInvalidValue +*/ +hipError_t hipLibraryGetKernelCount(unsigned int *count, hipLibrary_t library); + +/** + * @brief Retrieve kernel handles within a library + * + * @param [out] kernels Buffer for kernel handles + * @param [in] numKernels Maximum number of kernel handles to return to buffer + * @oaram [in] library Library handle to query from + * @return #hipSuccess, #hipErrorInvalidValue +*/ +hipError_t hipLibraryEnumerateKernels(hipKernel_t* kernels, unsigned int numKernels, + hipLibrary_t library); + +/** + * @brief Returns a Library Handle + * + * @param [out] library Returned Library handle + * @param [in] kernel Kernel to retrieve library Handle + * @return #hipSuccess, #hipErrorInvalidValue +*/ +hipError_t hipKernelGetLibrary(hipLibrary_t* library, hipKernel_t kernel); + +/** + * @brief Returns a Kernel Name + * + * @param [out] name Returned Kernel Name + * @param [in] kernel Kernel handle to retrieve name + * @return #hipSuccess, #hipErrorInvalidValue +*/ +hipError_t hipKernelGetName(const char** name, hipKernel_t kernel); + +/** + * @brief Returns the offset and size of a kernel parameter + * + * @param [in] kernel Kernel handle to retrieve parameter info + * @param [in] paramIndex Index of the parameter + * @param [out] paramOffset returns the offset of the parameter + * @param [out] paramSize Optionally returns the size of the parameter + * + * @return #hipSuccess, #hipErrorInvalidValue +*/ +hipError_t hipKernelGetParamInfo(hipKernel_t kernel, size_t paramIndex, size_t* paramOffset, + size_t* paramSize); + +/** + * @brief Find out attributes for a given function. + * @ingroup Execution + * @param [out] attr Attributes of funtion + * @param [in] func Pointer to the function handle + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDeviceFunction + */ +hipError_t hipFuncGetAttributes(struct hipFuncAttributes* attr, const void* func); +/** + * @brief Find out a specific attribute for a given function. + * @ingroup Execution + * @param [out] value Pointer to the value + * @param [in] attrib Attributes of the given funtion + * @param [in] hfunc Function to get attributes from + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDeviceFunction + */ +hipError_t hipFuncGetAttribute(int* value, hipFunction_attribute attrib, hipFunction_t hfunc); +/** + * @brief Gets pointer to device entry function that matches entry function symbolPtr. + * + * @param [out] functionPtr Device entry function + * @param [in] symbolPtr Pointer to device entry function to search for + * + * @returns #hipSuccess, #hipErrorInvalidDeviceFunction + * + */ +hipError_t hipGetFuncBySymbol(hipFunction_t* functionPtr, const void* symbolPtr); +/** + * @brief Gets function pointer of a requested HIP API + * + * @param [in] symbol The API base name + * @param [out] funcPtr Pointer to the requested function + * @param [in] flags Flags for the search + * @param [out] driverStatus Optional returned status of the search + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGetDriverEntryPoint(const char* symbol, void** funcPtr, unsigned long long flags, + hipDriverEntryPointQueryResult* driverStatus); +/** + * @brief returns the handle of the texture reference with the name from the module. + * + * @param [in] hmod Module + * @param [in] name Pointer of name of texture reference + * @param [out] texRef Pointer of texture reference + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorNotFound, #hipErrorInvalidValue + */ +hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const char* name); +/** + * @brief builds module from code object data which resides in host memory. + * + * The "image" is a pointer to the location of code object data. This data can be either + * a single code object or a fat binary (fatbin), which serves as the entry point for loading and + * launching device-specific kernel executions. + * + * By default, the following command generates a fatbin: + * + * "amdclang++ -O3 -c --offload-device-only --offload-arch= -o " + * + * For more details, refer to: + * + * Kernel Compilation in the HIP kernel language C++ support, or + * HIP runtime compilation (HIP RTC). + * + * @param [in] image The pointer to the location of data + * @param [out] module Retuned module + * + * @returns hipSuccess, hipErrorNotInitialized, hipErrorOutOfMemory, hipErrorNotInitialized + */ +hipError_t hipModuleLoadData(hipModule_t* module, const void* image); +/** + * @brief builds module from code object which resides in host memory. Image is pointer to that + * location. Options are not used. hipModuleLoadData is called. + * + * @param [in] image The pointer to the location of data + * @param [out] module Retuned module + * @param [in] numOptions Number of options + * @param [in] options Options for JIT + * @param [in] optionValues Option values for JIT + * + * @returns hipSuccess, hipErrorNotInitialized, hipErrorOutOfMemory, hipErrorNotInitialized + */ +hipError_t hipModuleLoadDataEx(hipModule_t* module, const void* image, unsigned int numOptions, + hipJitOption* options, void** optionValues); +/** + * @brief Adds bitcode data to be linked with options. + * @param [in] state hip link state + * @param [in] type Type of the input data or bitcode + * @param [in] data Input data which is null terminated + * @param [in] size Size of the input data + * @param [in] name Optional name for this input + * @param [in] numOptions Size of the options + * @param [in] options Array of options applied to this input + * @param [in] optionValues Array of option values cast to void* + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidHandle + * + * If adding the file fails, it will + * @return #hipErrorInvalidConfiguration + * + * @see hipError_t + */ +hipError_t hipLinkAddData(hipLinkState_t state, hipJitInputType type, void* data, size_t size, + const char* name, unsigned int numOptions, hipJitOption* options, + void** optionValues); + +/** + * @brief Adds a file with bitcode to be linked with options. + * @param [in] state hip link state + * @param [in] type Type of the input data or bitcode + * @param [in] path Path to the input file where bitcode is present + * @param [in] numOptions Size of the options + * @param [in] options Array of options applied to this input + * @param [in] optionValues Array of option values cast to void* + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * If adding the file fails, it will + * @return #hipErrorInvalidConfiguration + * + * @see hipError_t + */ +hipError_t hipLinkAddFile(hipLinkState_t state, hipJitInputType type, const char* path, + unsigned int numOptions, hipJitOption* options, void** optionValues); + +/** + * @brief Completes the linking of the given program. + * @param [in] state hip link state + * @param [out] hipBinOut Upon success, points to the output binary + * @param [out] sizeOut Size of the binary is stored (optional) + * + * @returns #hipSuccess #hipErrorInvalidValue + * + * If adding the data fails, it will + * @return #hipErrorInvalidConfiguration + * + * @see hipError_t + */ + +hipError_t hipLinkComplete(hipLinkState_t state, void** hipBinOut, size_t* sizeOut); + +/** + * @brief Creates a linker instance with options. + * @param [in] numOptions Number of options + * @param [in] options Array of options + * @param [in] optionValues Array of option values cast to void* + * @param [out] stateOut hip link state created upon success + * + * @returns #hipSuccess #hipErrorInvalidValue #hipErrorInvalidConfiguration + * + * @see hipSuccess + */ +hipError_t hipLinkCreate(unsigned int numOptions, hipJitOption* options, void** optionValues, + hipLinkState_t* stateOut); +/** + * @brief Deletes the linker instance. + * @param [in] state link state instance + * + * @returns #hipSuccess #hipErrorInvalidValue + * + * @see hipSuccess + */ +hipError_t hipLinkDestroy(hipLinkState_t state); + +/** + * @brief launches kernel f with launch parameters and shared memory on stream with arguments passed + * to kernelparams or extra + * @ingroup Execution + * @param [in] f Kernel to launch. + * @param [in] gridDimX X grid dimension specified as multiple of blockDimX. + * @param [in] gridDimY Y grid dimension specified as multiple of blockDimY. + * @param [in] gridDimZ Z grid dimension specified as multiple of blockDimZ. + * @param [in] blockDimX X block dimensions specified in work-items + * @param [in] blockDimY Y grid dimension specified in work-items + * @param [in] blockDimZ Z grid dimension specified in work-items + * @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The + * HIP-Clang compiler provides support for extern shared declarations. + * @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case th + * default stream is used with associated synchronization rules. + * @param [in] kernelParams Kernel parameters to launch + * @param [in] extra Pointer to kernel arguments. These are passed directly to the kernel and + * must be in the memory layout and alignment expected by the kernel. + * All passed arguments must be naturally aligned according to their type. The memory address of + * each argument should be a multiple of its size in bytes. Please refer to + * hip_porting_driver_api.md for sample usage. + * + * Please note, HIP does not support kernel launch with total work items defined in dimension with + * size gridDim x blockDim >= 2^32. So gridDim.x * blockDim.x, gridDim.y * blockDim.y + * and gridDim.z * blockDim.z are always less than 2^32. + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue + */ +hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, unsigned int gridDimY, + unsigned int gridDimZ, unsigned int blockDimX, + unsigned int blockDimY, unsigned int blockDimZ, + unsigned int sharedMemBytes, hipStream_t stream, + void** kernelParams, void** extra); +/** \addtogroup ModuleCooperativeG Cooperative groups kernel launch of Module management. + * \ingroup Module + * @{ */ +/** + * @brief launches kernel f with launch parameters and shared memory on stream with arguments passed + * to kernelParams, where thread blocks can cooperate and synchronize as they execute + * + * @param [in] f Kernel to launch. + * @param [in] gridDimX X grid dimension specified as multiple of blockDimX. + * @param [in] gridDimY Y grid dimension specified as multiple of blockDimY. + * @param [in] gridDimZ Z grid dimension specified as multiple of blockDimZ. + * @param [in] blockDimX X block dimension specified in work-items. + * @param [in] blockDimY Y block dimension specified in work-items. + * @param [in] blockDimZ Z block dimension specified in work-items. + * @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The + * HIP-Clang compiler provides support for extern shared declarations. + * @param [in] stream Stream where the kernel should be dispatched. May be 0, + * in which case the default stream is used with associated synchronization rules. + * @param [in] kernelParams A list of kernel arguments. + * + * Please note, HIP does not support kernel launch with total work items defined in dimension with + * size \f$ gridDim \cdot blockDim \geq 2^{32} \f$. + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidHandle, #hipErrorInvalidImage, #hipErrorInvalidValue, + * #hipErrorInvalidConfiguration, #hipErrorLaunchFailure, #hipErrorLaunchOutOfResources, + * #hipErrorLaunchTimeOut, #hipErrorCooperativeLaunchTooLarge, #hipErrorSharedObjectInitFailed + */ +hipError_t hipModuleLaunchCooperativeKernel(hipFunction_t f, unsigned int gridDimX, + unsigned int gridDimY, unsigned int gridDimZ, + unsigned int blockDimX, unsigned int blockDimY, + unsigned int blockDimZ, unsigned int sharedMemBytes, + hipStream_t stream, void** kernelParams); +/** + * @brief Launches kernels on multiple devices where thread blocks can cooperate and + * synchronize as they execute. + * + * @param [in] launchParamsList List of launch parameters, one per device. + * @param [in] numDevices Size of the launchParamsList array. + * @param [in] flags Flags to control launch behavior. + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidHandle, #hipErrorInvalidImage, #hipErrorInvalidValue, + * #hipErrorInvalidConfiguration, #hipErrorInvalidResourceHandle, #hipErrorLaunchFailure, + * #hipErrorLaunchOutOfResources, #hipErrorLaunchTimeOut, #hipErrorCooperativeLaunchTooLarge, + * #hipErrorSharedObjectInitFailed + */ +hipError_t hipModuleLaunchCooperativeKernelMultiDevice(hipFunctionLaunchParams* launchParamsList, + unsigned int numDevices, unsigned int flags); +/** + * @brief Launches kernel f with launch parameters and shared memory on stream with arguments passed + * to kernelparams or extra, where thread blocks can cooperate and synchronize as they execute. + * + * @param [in] f - Kernel to launch. + * @param [in] gridDim - Grid dimensions specified as multiple of blockDim. + * @param [in] blockDimX - Block dimensions specified in work-items + * @param [in] kernelParams - Pointer of arguments passed to the kernel. If the kernel has multiple + * parameters, 'kernelParams' should be array of pointers, each points the corresponding argument. + * @param [in] sharedMemBytes - Amount of dynamic shared memory to allocate for this kernel. The + * HIP-Clang compiler provides support for extern shared declarations. + * @param [in] stream - Stream where the kernel should be dispatched. May be 0, in which case th + * default stream is used with associated synchronization rules. + * + * Please note, HIP does not support kernel launch with total work items defined in dimension with + * size \f$ gridDim \cdot blockDim \geq 2^{32} \f$. + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue, + * #hipErrorCooperativeLaunchTooLarge + */ +hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDimX, + void** kernelParams, unsigned int sharedMemBytes, + hipStream_t stream); +/** + * @brief Launches kernels on multiple devices where thread blocks can cooperate and + * synchronize as they execute. + * + * @param [in] launchParamsList List of launch parameters, one per device. + * @param [in] numDevices Size of the launchParamsList array. + * @param [in] flags Flags to control launch behavior. + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue, + * #hipErrorCooperativeLaunchTooLarge + */ +hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, + unsigned int flags); + +// Doxygen end group ModuleCooperativeG +/** @} */ + +/** + * @brief Launches kernels on multiple devices and guarantees all specified kernels are dispatched + * on respective streams before enqueuing any other work on the specified streams from any other + * threads + * @ingroup Execution + * @param [in] launchParamsList List of launch parameters, one per device. + * @param [in] numDevices Size of the launchParamsList array. + * @param [in] flags Flags to control launch behavior. + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue + */ +hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, + unsigned int flags); +/** + * @brief Launches a HIP kernel using a generic function pointer and the specified configuration. + * @ingroup Execution + * + * This function is equivalent to hipLaunchKernelEx but accepts the kernel as a generic function + * pointer. + * + * @param [in] config Pointer to the kernel launch configuration structure. + * @param [in] fPtr Pointer to the device kernel function. + * @param [in] args Array of pointers to the kernel arguments. + * + * @returns #hipSuccess if the kernel is launched successfully, otherwise an appropriate error code. + */ +hipError_t hipLaunchKernelExC(const hipLaunchConfig_t* config, const void* fPtr, void** args); +/** + * @brief Launches a HIP kernel using the driver API with the specified configuration. + * @ingroup Execution + * + * This function dispatches the device kernel represented by a HIP function object. + * It passes both the kernel parameters and any extra configuration arguments to the kernel launch. + * + * @param [in] config Pointer to the kernel launch configuration structure. + * @param [in] f HIP function object representing the device kernel to be launched. + * @param [in] params Array of pointers to the kernel parameters. + * @param [in] extra Array of pointers for additional launch parameters or extra configuration + * data. + * + * @returns #hipSuccess if the kernel is launched successfully, otherwise an appropriate error code. + */ +hipError_t hipDrvLaunchKernelEx(const HIP_LAUNCH_CONFIG* config, hipFunction_t f, void** params, + void** extra); +/** + * @brief Returns a handle for the address range requested. + * + * This function returns a handle to a device pointer created using either hipMalloc set of APIs + * or through hipMemAddressReserve (as long as the ptr is mapped). + * + * @param [out] handle Ptr to the handle where the fd or other types will be returned. + * @param [in] dptr Device ptr for which we get the handle. + * @param [in] size Size of the address range. + * @param [in] handleType Type of the handle requested for the address range. + * @param [in] flags Any flags set regarding the handle requested. + * + * @returns #hipSuccess if the kernel is launched successfully, otherwise an appropriate error code. + */ +hipError_t hipMemGetHandleForAddressRange(void* handle, hipDeviceptr_t dptr, size_t size, + hipMemRangeHandleType handleType, + unsigned long long flags); +// doxygen end Module +/** + * @} + */ + + +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Occupancy Occupancy + * @{ + * This section describes the occupancy functions of HIP runtime API. + * + */ +/** + * @brief determine the grid and block sizes to achieves maximum occupancy for a kernel + * + * @param [out] gridSize minimum grid size for maximum potential occupancy + * @param [out] blockSize block size for maximum potential occupancy + * @param [in] f kernel function for which occupancy is calulated + * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block + * @param [in] blockSizeLimit the maximum block size for the kernel, use 0 for no limit + * + * Please note, HIP does not support kernel launch with total work items defined in dimension with + * size gridDim x blockDim >= 2^32. + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +// TODO - Match CUoccupancyB2DSize +hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, hipFunction_t f, + size_t dynSharedMemPerBlk, int blockSizeLimit); +/** + * @brief determine the grid and block sizes to achieves maximum occupancy for a kernel + * + * @param [out] gridSize minimum grid size for maximum potential occupancy + * @param [out] blockSize block size for maximum potential occupancy + * @param [in] f kernel function for which occupancy is calulated + * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block + * @param [in] blockSizeLimit the maximum block size for the kernel, use 0 for no limit + * @param [in] flags Extra flags for occupancy calculation (only default supported) + * + * Please note, HIP does not support kernel launch with total work items defined in dimension with + * size gridDim x blockDim >= 2^32. + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +// TODO - Match CUoccupancyB2DSize +hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize, + hipFunction_t f, + size_t dynSharedMemPerBlk, + int blockSizeLimit, unsigned int flags); +/** + * @brief Returns occupancy for a device function. + * + * @param [out] numBlocks Returned occupancy + * @param [in] f Kernel function (hipFunction) for which occupancy is calulated + * @param [in] blockSize Block size the kernel is intended to be launched with + * @param [in] dynSharedMemPerBlk Dynamic shared memory usage (in bytes) intended for each block + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, hipFunction_t f, + int blockSize, + size_t dynSharedMemPerBlk); +/** + * @brief Returns occupancy for a device function. + * + * @param [out] numBlocks Returned occupancy + * @param [in] f Kernel function(hipFunction_t) for which occupancy is calulated + * @param [in] blockSize Block size the kernel is intended to be launched with + * @param [in] dynSharedMemPerBlk Dynamic shared memory usage (in bytes) intended for each block + * @param [in] flags Extra flags for occupancy calculation (only default supported) + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags); +/** + * @brief Returns occupancy for a device function. + * + * @param [out] numBlocks Returned occupancy + * @param [in] f Kernel function for which occupancy is calulated + * @param [in] blockSize Block size the kernel is intended to be launched with + * @param [in] dynSharedMemPerBlk Dynamic shared memory usage (in bytes) intended for each block + * @returns #hipSuccess, #hipErrorInvalidDeviceFunction, #hipErrorInvalidValue + */ +hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, const void* f, + int blockSize, size_t dynSharedMemPerBlk); +/** + * @brief Returns occupancy for a device function. + * + * @param [out] numBlocks Returned occupancy + * @param [in] f Kernel function for which occupancy is calulated + * @param [in] blockSize Block size the kernel is intended to be launched with + * @param [in] dynSharedMemPerBlk Dynamic shared memory usage (in bytes) intended for each block + * @param [in] flags Extra flags for occupancy calculation (currently ignored) + * @returns #hipSuccess, #hipErrorInvalidDeviceFunction, #hipErrorInvalidValue + */ +hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + int* numBlocks, const void* f, int blockSize, size_t dynSharedMemPerBlk, + unsigned int flags __dparm(hipOccupancyDefault)); +/** + * @brief determine the grid and block sizes to achieves maximum occupancy for a kernel + * + * @param [out] gridSize minimum grid size for maximum potential occupancy + * @param [out] blockSize block size for maximum potential occupancy + * @param [in] f kernel function for which occupancy is calulated + * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block + * @param [in] blockSizeLimit the maximum block size for the kernel, use 0 for no limit + * + * Please note, HIP does not support kernel launch with total work items defined in dimension with + * size gridDim x blockDim >= 2^32. + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, const void* f, + size_t dynSharedMemPerBlk, int blockSizeLimit); +/** + * @brief Returns dynamic shared memory available per block when launching numBlocks blocks on SM. + * + * @ingroup Occupancy + * Returns in \p *dynamicSmemSize the maximum size of dynamic shared memory / + * to allow numBlocks blocks per SM. + * + * @param [out] dynamicSmemSize Returned maximum dynamic shared memory. + * @param [in] f Kernel function for which occupancy is calculated. + * @param [in] numBlocks Number of blocks to fit on SM + * @param [in] blockSize Size of the block + * + * @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidDeviceFunction, #hipErrorInvalidValue, + * #hipErrorUnknown + */ +hipError_t hipOccupancyAvailableDynamicSMemPerBlock(size_t* dynamicSmemSize, const void* f, + int numBlocks, int blockSize); +// doxygen end Occupancy +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Profiler Profiler Control [Deprecated] + * @{ + * This section describes the profiler control functions of HIP runtime API. + * + * @warning The cudaProfilerInitialize API format for "configFile" is not supported. + * + */ +// TODO - expand descriptions: +/** + * @brief Start recording of profiling information [Deprecated] + * When using this API, start the profiler with profiling disabled. (--startdisabled) + * @returns #hipErrorNotSupported + * @warning hipProfilerStart API is deprecated, use roctracer/rocTX instead. + */ +HIP_DEPRECATED("use roctracer/rocTX instead") +hipError_t hipProfilerStart(); +/** + * @brief Stop recording of profiling information [Deprecated] + * When using this API, start the profiler with profiling disabled. (--startdisabled) + * @returns #hipErrorNotSupported + * @warning hipProfilerStart API is deprecated, use roctracer/rocTX instead. + */ +HIP_DEPRECATED("use roctracer/rocTX instead") +hipError_t hipProfilerStop(); +// doxygen end profiler +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Clang Launch API to support the triple-chevron syntax + * @{ + * This section describes the API to support the triple-chevron syntax. + */ +/** + * @brief Configure a kernel launch. + * + * @param [in] gridDim grid dimension specified as multiple of blockDim. + * @param [in] blockDim block dimensions specified in work-items + * @param [in] sharedMem Amount of dynamic shared memory to allocate for this kernel. The + * HIP-Clang compiler provides support for extern shared declarations. + * @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case the + * default stream is used with associated synchronization rules. + * + * Please note, HIP does not support kernel launch with total work items defined in dimension with + * size gridDim x blockDim >= 2^32. + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue + * + */ +hipError_t hipConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem __dparm(0), + hipStream_t stream __dparm(0)); +/** + * @brief Set a kernel argument. + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue + * + * @param [in] arg Pointer the argument in host memory. + * @param [in] size Size of the argument. + * @param [in] offset Offset of the argument on the argument stack. + * + */ +hipError_t hipSetupArgument(const void* arg, size_t size, size_t offset); +/** + * @brief Launch a kernel. + * + * @param [in] func Kernel to launch. + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue + * + */ +hipError_t hipLaunchByPtr(const void* func); +/** + * @brief Push configuration of a kernel launch. + * + * @param [in] gridDim grid dimension specified as multiple of blockDim. + * @param [in] blockDim block dimensions specified in work-items + * @param [in] sharedMem Amount of dynamic shared memory to allocate for this kernel. The + * HIP-Clang compiler provides support for extern shared declarations. + * @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case the + * default stream is used with associated synchronization rules. + * + * Please note, HIP does not support kernel launch with total work items defined in dimension with + * size gridDim x blockDim >= 2^32. + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue + * + */ +hipError_t __hipPushCallConfiguration(dim3 gridDim, dim3 blockDim, size_t sharedMem __dparm(0), + hipStream_t stream __dparm(0)); +/** + * @brief Pop configuration of a kernel launch. + * + * @param [out] gridDim grid dimension specified as multiple of blockDim. + * @param [out] blockDim block dimensions specified in work-items + * @param [out] sharedMem Amount of dynamic shared memory to allocate for this kernel. The + * HIP-Clang compiler provides support for extern shared declarations. + * @param [out] stream Stream where the kernel should be dispatched. May be 0, in which case the + * default stream is used with associated synchronization rules. + * + * Please note, HIP does not support kernel launch with total work items defined in dimension with + * size gridDim x blockDim >= 2^32. + * + * Please note, HIP does not support kernel launch with total work items defined in dimension with + * size gridDim x blockDim >= 2^32. + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue + * + */ +hipError_t __hipPopCallConfiguration(dim3* gridDim, dim3* blockDim, size_t* sharedMem, + hipStream_t* stream); +/** + * @brief C compliant kernel launch API + * + * @param [in] function_address - Kernel stub function pointer. + * @param [in] numBlocks - Number of blocks. + * @param [in] dimBlocks - Dimension of a block + * @param [in] args - Pointer of arguments passed to the kernel. If the kernel has multiple + * parameters, 'args' should be array of pointers, each points the corresponding argument. + * @param [in] sharedMemBytes - Amount of dynamic shared memory to allocate for this kernel. The + * HIP-Clang compiler provides support for extern shared declarations. + * @param [in] stream - Stream where the kernel should be dispatched. May be 0, in which case th + * default stream is used with associated synchronization rules. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipLaunchKernel(const void* function_address, dim3 numBlocks, dim3 dimBlocks, + void** args, size_t sharedMemBytes __dparm(0), + hipStream_t stream __dparm(0)); + +/** + * @brief Enqueues a host function call in a stream. + * + * @param [in] stream - The stream to enqueue work in. + * @param [in] fn - The function to call once enqueued preceeding operations are complete. + * @param [in] userData - User-specified data to be passed to the function. + * + * @returns #hipSuccess, #hipErrorInvalidResourceHandle, #hipErrorInvalidValue, + * #hipErrorNotSupported + * + * The host function to call in this API will be executed after the preceding operations in + * the stream are complete. The function is a blocking operation that blocks operations in the + * stream that follow it, until the function is returned. + * Event synchronization and internal callback functions make sure enqueued operations will + * execute in order, in the stream. + * + * The host function must not make any HIP API calls. The host function is non-reentrant. It must + * not perform sychronization with any operation that may depend on other processing execution + * but is not enqueued to run earlier in the stream. + * + * Host functions that are enqueued respectively in different non-blocking streams can run + * concurrently. + * + * @warning This API is marked as beta, meaning, while this is feature complete, + * it is still open to changes and may have outstanding issues. + */ +hipError_t hipLaunchHostFunc(hipStream_t stream, hipHostFn_t fn, void* userData); + +/** + * Copies memory for 2D arrays. + * + * @param pCopy - Parameters for the memory copy + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipDrvMemcpy2DUnaligned(const hip_Memcpy2D* pCopy); +// TODO: Move this to hip_ext.h +/** + * @brief Launches kernel from the pointer address, with arguments and shared memory on stream. + * + * @param [in] function_address - Pointer to the Kernel to launch. + * @param [in] numBlocks - Number of blocks. + * @param [in] dimBlocks - Dimension of a block. + * @param [in] args - Pointer of arguments passed to the kernel. If the kernel has multiple + * parameters, 'args' should be array of pointers, each points the corresponding argument. + * @param [in] sharedMemBytes - Amount of dynamic shared memory to allocate for this kernel. + * HIP-Clang compiler provides support for extern shared declarations. + * @param [in] stream - Stream where the kernel should be dispatched. + * May be 0, in which case the default stream is used with associated synchronization rules. + * @param [in] startEvent - If non-null, specified event will be updated to track the start time of + * the kernel launch. The event must be created before calling this API. + * @param [in] stopEvent - If non-null, specified event will be updated to track the stop time of + * the kernel launch. The event must be created before calling this API. + * @param [in] flags - The value of hipExtAnyOrderLaunch, signifies if kernel can be + * launched in any order. + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue. + * + */ +hipError_t hipExtLaunchKernel(const void* function_address, dim3 numBlocks, dim3 dimBlocks, + void** args, size_t sharedMemBytes, hipStream_t stream, + hipEvent_t startEvent, hipEvent_t stopEvent, int flags); +// doxygen end Clang launch +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Texture Texture Management + * @{ + * This section describes the texture management functions of HIP runtime API. + */ + +/** + * @brief Creates a texture object. + * + * @param [out] pTexObject pointer to the texture object to create + * @param [in] pResDesc pointer to resource descriptor + * @param [in] pTexDesc pointer to texture descriptor + * @param [in] pResViewDesc pointer to resource view descriptor + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported, #hipErrorOutOfMemory + * + * @note 3D linear filter isn't supported on GFX90A boards, on which the API @p + * hipCreateTextureObject will return hipErrorNotSupported. + * + */ +hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResourceDesc* pResDesc, + const hipTextureDesc* pTexDesc, + const struct hipResourceViewDesc* pResViewDesc); + +/** + * @brief Destroys a texture object. + * + * @param [in] textureObject texture object to destroy + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject); + +/** + * @brief Gets the channel descriptor in an array. + * + * @param [in] desc pointer to channel format descriptor + * @param [out] array memory array on the device + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array); + +/** + * @brief Gets resource descriptor for the texture object. + * + * @param [out] pResDesc pointer to resource descriptor + * @param [in] textureObject texture object + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc, + hipTextureObject_t textureObject); + +/** + * @brief Gets resource view descriptor for the texture object. + * + * @param [out] pResViewDesc pointer to resource view descriptor + * @param [in] textureObject texture object + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGetTextureObjectResourceViewDesc(struct hipResourceViewDesc* pResViewDesc, + hipTextureObject_t textureObject); + +/** + * @brief Gets texture descriptor for the texture object. + * + * @param [out] pTexDesc pointer to texture descriptor + * @param [in] textureObject texture object + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGetTextureObjectTextureDesc(hipTextureDesc* pTexDesc, + hipTextureObject_t textureObject); + +/** + * @brief Creates a texture object. + * + * @param [out] pTexObject pointer to texture object to create + * @param [in] pResDesc pointer to resource descriptor + * @param [in] pTexDesc pointer to texture descriptor + * @param [in] pResViewDesc pointer to resource view descriptor + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipTexObjectCreate(hipTextureObject_t* pTexObject, const HIP_RESOURCE_DESC* pResDesc, + const HIP_TEXTURE_DESC* pTexDesc, + const HIP_RESOURCE_VIEW_DESC* pResViewDesc); + +/** + * @brief Destroys a texture object. + * + * @param [in] texObject texture object to destroy + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipTexObjectDestroy(hipTextureObject_t texObject); + +/** + * @brief Gets resource descriptor of a texture object. + * + * @param [out] pResDesc pointer to resource descriptor + * @param [in] texObject texture object + * + * @returns #hipSuccess, #hipErrorNotSupported, #hipErrorInvalidValue + * + */ +hipError_t hipTexObjectGetResourceDesc(HIP_RESOURCE_DESC* pResDesc, hipTextureObject_t texObject); + +/** + * @brief Gets resource view descriptor of a texture object. + * + * @param [out] pResViewDesc pointer to resource view descriptor + * @param [in] texObject texture object + * + * @returns #hipSuccess, #hipErrorNotSupported, #hipErrorInvalidValue + * + */ +hipError_t hipTexObjectGetResourceViewDesc(HIP_RESOURCE_VIEW_DESC* pResViewDesc, + hipTextureObject_t texObject); + +/** + * @brief Gets texture descriptor of a texture object. + * + * @param [out] pTexDesc pointer to texture descriptor + * @param [in] texObject texture object + * + * @returns #hipSuccess, #hipErrorNotSupported, #hipErrorInvalidValue + * + */ +hipError_t hipTexObjectGetTextureDesc(HIP_TEXTURE_DESC* pTexDesc, hipTextureObject_t texObject); + +/** + * @brief Allocate a mipmapped array on the device. + * + * @param[out] mipmappedArray - Pointer to allocated mipmapped array in device memory + * @param[in] desc - Requested channel format + * @param[in] extent - Requested allocation size (width field in elements) + * @param[in] numLevels - Number of mipmap levels to allocate + * @param[in] flags - Flags for extensions + * + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryAllocation + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + * + */ +hipError_t hipMallocMipmappedArray(hipMipmappedArray_t* mipmappedArray, + const struct hipChannelFormatDesc* desc, struct hipExtent extent, + unsigned int numLevels, unsigned int flags __dparm(0)); + +/** + * @brief Frees a mipmapped array on the device. + * + * @param[in] mipmappedArray - Pointer to mipmapped array to free + * + * @return #hipSuccess, #hipErrorInvalidValue + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + * + */ +hipError_t hipFreeMipmappedArray(hipMipmappedArray_t mipmappedArray); + +/** + * @brief Gets a mipmap level of a HIP mipmapped array. + * + * @param[out] levelArray - Returned mipmap level HIP array + * @param[in] mipmappedArray - HIP mipmapped array + * @param[in] level - Mipmap level + * + * @return #hipSuccess, #hipErrorInvalidValue + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + * + */ +hipError_t hipGetMipmappedArrayLevel(hipArray_t* levelArray, + hipMipmappedArray_const_t mipmappedArray, unsigned int level); + +/** + * @brief Create a mipmapped array. + * + * @param [out] pHandle pointer to mipmapped array + * @param [in] pMipmappedArrayDesc mipmapped array descriptor + * @param [in] numMipmapLevels mipmap level + * + * @returns #hipSuccess, #hipErrorNotSupported, #hipErrorInvalidValue + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMipmappedArrayCreate(hipMipmappedArray_t* pHandle, + HIP_ARRAY3D_DESCRIPTOR* pMipmappedArrayDesc, + unsigned int numMipmapLevels); + +/** + * @brief Destroy a mipmapped array. + * + * @param [out] hMipmappedArray pointer to mipmapped array to destroy + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + * + */ +hipError_t hipMipmappedArrayDestroy(hipMipmappedArray_t hMipmappedArray); + +/** + * @brief Get a mipmapped array on a mipmapped level. + * + * @param [in] pLevelArray Pointer of array + * @param [out] hMipMappedArray Pointer of mipmapped array on the requested mipmap level + * @param [out] level Mipmap level + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + * + */ +hipError_t hipMipmappedArrayGetLevel(hipArray_t* pLevelArray, hipMipmappedArray_t hMipMappedArray, + unsigned int level); + +/** + * + * @addtogroup TextureD Texture Management [Deprecated] + * @{ + * @ingroup Texture + * This section describes the deprecated texture management functions of HIP runtime API. + */ + +/** + * @brief Binds a mipmapped array to a texture [Deprecated] + * + * @param [in] tex pointer to the texture reference to bind + * @param [in] mipmappedArray memory mipmapped array on the device + * @param [in] desc opointer to the channel format + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipBindTextureToMipmappedArray(const textureReference* tex, + hipMipmappedArray_const_t mipmappedArray, + const hipChannelFormatDesc* desc); + +/** + * @brief Gets the texture reference related with the symbol [Deprecated] + * + * @param [out] texref texture reference + * @param [in] symbol pointer to the symbol related with the texture for the reference + * + * @returns #hipSuccess, #hipErrorInvalidValue + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipGetTextureReference(const textureReference** texref, const void* symbol); + +/** + * @brief Gets the border color used by a texture reference [Deprecated] + * + * @param [out] pBorderColor Returned Type and Value of RGBA color. + * @param [in] texRef Texture reference. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefGetBorderColor(float* pBorderColor, const textureReference* texRef); + +/** + * @brief Gets the array bound to a texture reference [Deprecated] + + * + * @param [in] pArray Returned array. + * @param [in] texRef texture reference. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefGetArray(hipArray_t* pArray, const textureReference* texRef); + +/** + * @brief Sets address mode for a texture reference [Deprecated] + * + * @param [in] texRef texture reference. + * @param [in] dim Dimension of the texture. + * @param [in] am Value of the texture address mode. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetAddressMode(textureReference* texRef, int dim, + enum hipTextureAddressMode am); +/** + * @brief Binds an array as a texture reference [Deprecated] + * + * @param [in] tex Pointer texture reference. + * @param [in] array Array to bind. + * @param [in] flags Flags should be set as HIP_TRSA_OVERRIDE_FORMAT, as a valid value. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetArray(textureReference* tex, hipArray_const_t array, unsigned int flags); +/** + * @brief Set filter mode for a texture reference [Deprecated] + * + * @param [in] texRef Pointer texture reference. + * @param [in] fm Value of texture filter mode. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetFilterMode(textureReference* texRef, enum hipTextureFilterMode fm); +/** + * @brief Set flags for a texture reference [Deprecated] + * + * @param [in] texRef Pointer texture reference. + * @param [in] Flags Value of flags. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetFlags(textureReference* texRef, unsigned int Flags); +/** + * @brief Set format for a texture reference [Deprecated] + * + * @param [in] texRef Pointer texture reference. + * @param [in] fmt Value of format. + * @param [in] NumPackedComponents Number of components per array. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetFormat(textureReference* texRef, hipArray_Format fmt, + int NumPackedComponents); +/** + * @brief Binds a memory area to a texture [Deprecated] + * + * @param [in] offset Offset in bytes. + * @param [in] tex Texture to bind. + * @param [in] devPtr Pointer of memory on the device. + * @param [in] desc Pointer of channel format descriptor. + * @param [in] size Size of memory in bites. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipBindTexture(size_t* offset, const textureReference* tex, const void* devPtr, + const hipChannelFormatDesc* desc, size_t size __dparm(UINT_MAX)); +/** + * @brief Binds a 2D memory area to a texture [Deprecated] + * + * @param [in] offset Offset in bytes. + * @param [in] tex Texture to bind. + * @param [in] devPtr Pointer of 2D memory area on the device. + * @param [in] desc Pointer of channel format descriptor. + * @param [in] width Width in texel units. + * @param [in] height Height in texel units. + * @param [in] pitch Pitch in bytes. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipBindTexture2D(size_t* offset, const textureReference* tex, const void* devPtr, + const hipChannelFormatDesc* desc, size_t width, size_t height, + size_t pitch); +/** + * @brief Binds a memory area to a texture [Deprecated] + * + * @param [in] tex Pointer of texture reference. + * @param [in] array Array to bind. + * @param [in] desc Pointer of channel format descriptor. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipBindTextureToArray(const textureReference* tex, hipArray_const_t array, + const hipChannelFormatDesc* desc); +/** + * @brief Get the offset of the alignment in a texture [Deprecated] + * + * @param [in] offset Offset in bytes. + * @param [in] texref Pointer of texture reference. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipGetTextureAlignmentOffset(size_t* offset, const textureReference* texref); +/** + * @brief Unbinds a texture [Deprecated] + * + * @param [in] tex Texture to unbind. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipUnbindTexture(const textureReference* tex); +/** + * @brief Gets the address for a texture reference [Deprecated] + * + * @param [out] dev_ptr Pointer of device address. + * @param [in] texRef Pointer of texture reference. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefGetAddress(hipDeviceptr_t* dev_ptr, const textureReference* texRef); +/** + * @brief Gets the address mode for a texture reference [Deprecated] + * + * @param [out] pam Pointer of address mode. + * @param [in] texRef Pointer of texture reference. + * @param [in] dim Dimension. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefGetAddressMode(enum hipTextureAddressMode* pam, const textureReference* texRef, + int dim); +/** + * @brief Gets filter mode for a texture reference [Deprecated] + * + * @param [out] pfm Pointer of filter mode. + * @param [in] texRef Pointer of texture reference. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefGetFilterMode(enum hipTextureFilterMode* pfm, const textureReference* texRef); +/** + * @brief Gets flags for a texture reference [Deprecated] + * + * @param [out] pFlags Pointer of flags. + * @param [in] texRef Pointer of texture reference. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefGetFlags(unsigned int* pFlags, const textureReference* texRef); +/** + * @brief Gets texture format for a texture reference [Deprecated] + * + * @param [out] pFormat Pointer of the format. + * @param [out] pNumChannels Pointer of number of channels. + * @param [in] texRef Pointer of texture reference. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefGetFormat(hipArray_Format* pFormat, int* pNumChannels, + const textureReference* texRef); +/** + * @brief Gets the maximum anisotropy for a texture reference [Deprecated] + * + * @param [out] pmaxAnsio Pointer of the maximum anisotropy. + * @param [in] texRef Pointer of texture reference. + * + * @returns #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefGetMaxAnisotropy(int* pmaxAnsio, const textureReference* texRef); +/** + * @brief Gets the mipmap filter mode for a texture reference [Deprecated] + * + * @param [out] pfm Pointer of the mipmap filter mode. + * @param [in] texRef Pointer of texture reference. + * + * @returns #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefGetMipmapFilterMode(enum hipTextureFilterMode* pfm, + const textureReference* texRef); +/** + * @brief Gets the mipmap level bias for a texture reference [Deprecated] + * + * @param [out] pbias Pointer of the mipmap level bias. + * @param [in] texRef Pointer of texture reference. + * + * @returns #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefGetMipmapLevelBias(float* pbias, const textureReference* texRef); +/** + * @brief Gets the minimum and maximum mipmap level clamps for a texture reference [Deprecated] + * + * @param [out] pminMipmapLevelClamp Pointer of the minimum mipmap level clamp. + * @param [out] pmaxMipmapLevelClamp Pointer of the maximum mipmap level clamp. + * @param [in] texRef Pointer of texture reference. + * + * @returns #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefGetMipmapLevelClamp(float* pminMipmapLevelClamp, float* pmaxMipmapLevelClamp, + const textureReference* texRef); +/** + * @brief Gets the mipmapped array bound to a texture reference [Deprecated] + * + * @param [out] pArray Pointer of the mipmapped array. + * @param [in] texRef Pointer of texture reference. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefGetMipMappedArray(hipMipmappedArray_t* pArray, const textureReference* texRef); +/** + * @brief Sets an bound address for a texture reference [Deprecated] + * + * @param [out] ByteOffset Pointer of the offset in bytes. + * @param [in] texRef Pointer of texture reference. + * @param [in] dptr Pointer of device address to bind. + * @param [in] bytes Size in bytes. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetAddress(size_t* ByteOffset, textureReference* texRef, hipDeviceptr_t dptr, + size_t bytes); +/** + * @brief Set a bind an address as a 2D texture reference [Deprecated] + * + * @param [in] texRef Pointer of texture reference. + * @param [in] desc Pointer of array descriptor. + * @param [in] dptr Pointer of device address to bind. + * @param [in] Pitch Pitch in bytes. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetAddress2D(textureReference* texRef, const HIP_ARRAY_DESCRIPTOR* desc, + hipDeviceptr_t dptr, size_t Pitch); +/** + * @brief Sets the maximum anisotropy for a texture reference [Deprecated] + * + * @param [in] texRef Pointer of texture reference. + * @param [out] maxAniso Value of the maximum anisotropy. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetMaxAnisotropy(textureReference* texRef, unsigned int maxAniso); +/** + * @brief Sets border color for a texture reference [Deprecated] + * + * @param [in] texRef Pointer of texture reference. + * @param [in] pBorderColor Pointer of border color. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetBorderColor(textureReference* texRef, float* pBorderColor); +/** + * @brief Sets mipmap filter mode for a texture reference [Deprecated] + * + * @param [in] texRef Pointer of texture reference. + * @param [in] fm Value of filter mode. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetMipmapFilterMode(textureReference* texRef, enum hipTextureFilterMode fm); +/** + * @brief Sets mipmap level bias for a texture reference [Deprecated] + * + * @param [in] texRef Pointer of texture reference. + * @param [in] bias Value of mipmap bias. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetMipmapLevelBias(textureReference* texRef, float bias); +/** + * @brief Sets mipmap level clamp for a texture reference [Deprecated] + * + * @param [in] texRef Pointer of texture reference. + * @param [in] minMipMapLevelClamp Value of minimum mipmap level clamp. + * @param [in] maxMipMapLevelClamp Value of maximum mipmap level clamp. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetMipmapLevelClamp(textureReference* texRef, float minMipMapLevelClamp, + float maxMipMapLevelClamp); +/** + * @brief Binds mipmapped array to a texture reference [Deprecated] + * + * @param [in] texRef Pointer of texture reference to bind. + * @param [in] mipmappedArray Pointer of mipmapped array to bind. + * @param [in] Flags Flags should be set as HIP_TRSA_OVERRIDE_FORMAT, as a valid value. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetMipmappedArray(textureReference* texRef, + struct hipMipmappedArray* mipmappedArray, unsigned int Flags); + +// doxygen end deprecated texture management +/** + * @} + */ + +// doxygen end Texture management +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Runtime Runtime Compilation + * @{ + * This section describes the runtime compilation functions of HIP runtime API. + * + */ +// This group is for HIPrtc + +// doxygen end Runtime +/** + * @} + */ + +/** + * + * @defgroup Callback Callback Activity APIs + * @{ + * This section describes the callback/Activity of HIP runtime API. + */ +/** + * @brief Returns HIP API name by ID. + * + * @param [in] id ID of HIP API + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +const char* hipApiName(uint32_t id); +/** + * @brief Returns kernel name reference by function name. + * + * @param [in] f Name of function + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +const char* hipKernelNameRef(const hipFunction_t f); +/** + * @brief Retrives kernel for a given host pointer, unless stated otherwise. + * + * @param [in] hostFunction Pointer of host function. + * @param [in] stream Stream the kernel is executed on. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +const char* hipKernelNameRefByPtr(const void* hostFunction, hipStream_t stream); +/** + * @brief Returns device ID on the stream. + * + * @param [in] stream Stream of device executed on. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +int hipGetStreamDeviceId(hipStream_t stream); + +// doxygen end Callback +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Graph Graph Management + * @{ + * This section describes the graph management types & functions of HIP runtime API. + */ + +/** + * @brief Begins graph capture on a stream. + * + * @param [in] stream - Stream to initiate capture. + * @param [in] mode - Controls the interaction of this capture sequence with other API calls that + * are not safe. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipStreamBeginCapture(hipStream_t stream, hipStreamCaptureMode mode); + +/** +* @brief Begins graph capture on a stream to an existing graph. +* +* @param [in] stream - Stream to initiate capture. +* @param [in] graph - Graph to capture into. +* @param [in] dependencies - Dependencies of the first node captured in the stream. Can be NULL if +* numDependencies is 0. +* @param [in] dependencyData - Optional array of data associated with each dependency. +* @param [in] numDependencies - Number of dependencies. +* @param [in] mode - Controls the interaction of this capture sequence with other API calls that +are not safe. +* +* @returns #hipSuccess, #hipErrorInvalidValue +* +* @warning param "const hipGraphEdgeData* dependencyData" is currently not supported and has to be +passed as nullptr. This API is marked as beta, meaning, while this is feature complete, it is still +open to changes and may have outstanding issues. +* +*/ +hipError_t hipStreamBeginCaptureToGraph(hipStream_t stream, hipGraph_t graph, + const hipGraphNode_t* dependencies, + const hipGraphEdgeData* dependencyData, + size_t numDependencies, hipStreamCaptureMode mode); + +/** + * @brief Ends capture on a stream, returning the captured graph. + * + * @param [in] stream - Stream to end capture. + * @param [out] pGraph - Captured graph. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipStreamEndCapture(hipStream_t stream, hipGraph_t* pGraph); + +/** + * @brief Get capture status of a stream. + * + * @param [in] stream - Stream of which to get capture status from. + * @param [out] pCaptureStatus - Returns current capture status. + * @param [out] pId - Unique capture ID. + * + * @returns #hipSuccess, #hipErrorStreamCaptureImplicit + * + */ +hipError_t hipStreamGetCaptureInfo(hipStream_t stream, hipStreamCaptureStatus* pCaptureStatus, + unsigned long long* pId); + +/** + * @brief Get stream's capture state + * + * @param [in] stream - Stream of which to get capture status from. + * @param [out] captureStatus_out - Returns current capture status. + * @param [out] id_out - Unique capture ID. + * @param [out] graph_out - Returns the graph being captured into. + * @param [out] dependencies_out - Pointer to an array of nodes representing the graphs + * dependencies. + * @param [out] numDependencies_out - Returns size of the array returned in dependencies_out. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorStreamCaptureImplicit + * + */ +hipError_t hipStreamGetCaptureInfo_v2(hipStream_t stream, hipStreamCaptureStatus* captureStatus_out, + unsigned long long* id_out __dparm(0), + hipGraph_t* graph_out __dparm(0), + const hipGraphNode_t** dependencies_out __dparm(0), + size_t* numDependencies_out __dparm(0)); + +/** + * @brief Get stream's capture state + * + * @param [in] stream - Stream of which to get capture status from. + * @param [out] pCaptureStatus - Returns current capture status. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorStreamCaptureImplicit + * + */ +hipError_t hipStreamIsCapturing(hipStream_t stream, hipStreamCaptureStatus* pCaptureStatus); + +/** + * @brief Update the set of dependencies in a capturing stream + * + * @param [in] stream Stream that is being captured. + * @param [in] dependencies Pointer to an array of nodes to add/replace. + * @param [in] numDependencies Size of the dependencies array. + * @param [in] flags Flag to update dependency set. Should be one of the values + * in enum #hipStreamUpdateCaptureDependenciesFlags. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorIllegalState + * + */ +hipError_t hipStreamUpdateCaptureDependencies(hipStream_t stream, hipGraphNode_t* dependencies, + size_t numDependencies, + unsigned int flags __dparm(0)); + +/** + * @brief Swaps the stream capture mode of a thread. + * + * @param [in] mode - Pointer to mode value to swap with the current mode. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipThreadExchangeStreamCaptureMode(hipStreamCaptureMode* mode); + +/** + * @brief Creates a graph + * + * @param [out] pGraph - pointer to graph to create. + * @param [in] flags - flags for graph creation, must be 0. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryAllocation + * + */ +hipError_t hipGraphCreate(hipGraph_t* pGraph, unsigned int flags); + +/** + * @brief Destroys a graph + * + * @param [in] graph - instance of graph to destroy. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphDestroy(hipGraph_t graph); + +/** + * @brief Adds dependency edges to a graph. + * + * @param [in] graph - Instance of the graph to add dependencies to. + * @param [in] from - Pointer to the graph nodes with dependencies to add from. + * @param [in] to - Pointer to the graph nodes to add dependencies to. + * @param [in] numDependencies - Number of dependencies to add. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddDependencies(hipGraph_t graph, const hipGraphNode_t* from, + const hipGraphNode_t* to, size_t numDependencies); + +/** + * @brief Removes dependency edges from a graph. + * + * @param [in] graph - Instance of the graph to remove dependencies from. + * @param [in] from - Array of nodes that provide the dependencies. + * @param [in] to - Array of dependent nodes. + * @param [in] numDependencies - Number of dependencies to remove. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphRemoveDependencies(hipGraph_t graph, const hipGraphNode_t* from, + const hipGraphNode_t* to, size_t numDependencies); + +/** + * @brief Returns a graph's dependency edges. + * + * @param [in] graph - Instance of the graph to get the edges from. + * @param [out] from - Pointer to the graph nodes to return edge endpoints. + * @param [out] to - Pointer to the graph nodes to return edge endpoints. + * @param [out] numEdges - Returns number of edges. + * @returns #hipSuccess, #hipErrorInvalidValue + * + * from and to may both be NULL, in which case this function only returns the number of edges in + * numEdges. Otherwise, numEdges entries will be filled in. If numEdges is higher than the actual + * number of edges, the remaining entries in from and to will be set to NULL, and the number of + * edges actually returned will be written to numEdges. + * + */ +hipError_t hipGraphGetEdges(hipGraph_t graph, hipGraphNode_t* from, hipGraphNode_t* to, + size_t* numEdges); + +/** + * @brief Returns a graph's nodes. + * + * @param [in] graph - Instance of graph to get the nodes from. + * @param [out] nodes - Pointer to return the graph nodes. + * @param [out] numNodes - Returns the number of graph nodes. + * @returns #hipSuccess, #hipErrorInvalidValue + * + * nodes may be NULL, in which case this function will return the number of nodes in numNodes. + * Otherwise, numNodes entries will be filled in. If numNodes is higher than the actual number of + * nodes, the remaining entries in nodes will be set to NULL, and the number of nodes actually + * obtained will be returned in numNodes. + * + */ +hipError_t hipGraphGetNodes(hipGraph_t graph, hipGraphNode_t* nodes, size_t* numNodes); + +/** + * @brief Returns a graph's root nodes. + * + * @param [in] graph - Instance of the graph to get the nodes from. + * @param [out] pRootNodes - Pointer to return the graph's root nodes. + * @param [out] pNumRootNodes - Returns the number of graph's root nodes. + * @returns #hipSuccess, #hipErrorInvalidValue + * + * pRootNodes may be NULL, in which case this function will return the number of root nodes in + * pNumRootNodes. Otherwise, pNumRootNodes entries will be filled in. If pNumRootNodes is higher + * than the actual number of root nodes, the remaining entries in pRootNodes will be set to NULL, + * and the number of nodes actually obtained will be returned in pNumRootNodes. + * + */ +hipError_t hipGraphGetRootNodes(hipGraph_t graph, hipGraphNode_t* pRootNodes, + size_t* pNumRootNodes); + +/** + * @brief Returns a node's dependencies. + * + * @param [in] node - Graph node to get the dependencies from. + * @param [out] pDependencies - Pointer to return the dependencies. + * @param [out] pNumDependencies - Returns the number of graph node dependencies. + * @returns #hipSuccess, #hipErrorInvalidValue + * + * pDependencies may be NULL, in which case this function will return the number of dependencies in + * pNumDependencies. Otherwise, pNumDependencies entries will be filled in. If pNumDependencies is + * higher than the actual number of dependencies, the remaining entries in pDependencies will be set + * to NULL, and the number of nodes actually obtained will be returned in pNumDependencies. + * + */ +hipError_t hipGraphNodeGetDependencies(hipGraphNode_t node, hipGraphNode_t* pDependencies, + size_t* pNumDependencies); + +/** + * @brief Returns a node's dependent nodes. + * + * @param [in] node - Graph node to get the dependent nodes from. + * @param [out] pDependentNodes - Pointer to return the graph dependent nodes. + * @param [out] pNumDependentNodes - Returns the number of graph node dependent nodes. + * @returns #hipSuccess, #hipErrorInvalidValue + * + * pDependentNodes may be NULL, in which case this function will return the number of dependent + * nodes in pNumDependentNodes. Otherwise, pNumDependentNodes entries will be filled in. If + * pNumDependentNodes is higher than the actual number of dependent nodes, the remaining entries in + * pDependentNodes will be set to NULL, and the number of nodes actually obtained will be returned + * in pNumDependentNodes. + * + */ +hipError_t hipGraphNodeGetDependentNodes(hipGraphNode_t node, hipGraphNode_t* pDependentNodes, + size_t* pNumDependentNodes); + +/** + * @brief Returns a node's type. + * + * @param [in] node - Node to get type of. + * @param [out] pType - Returns the node's type. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphNodeGetType(hipGraphNode_t node, hipGraphNodeType* pType); + +/** + * @brief Remove a node from the graph. + * + * @param [in] node - graph node to remove + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphDestroyNode(hipGraphNode_t node); + +/** + * @brief Clones a graph. + * + * @param [out] pGraphClone - Returns newly created cloned graph. + * @param [in] originalGraph - original graph to clone from. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryAllocation + * + */ +hipError_t hipGraphClone(hipGraph_t* pGraphClone, hipGraph_t originalGraph); + +/** + * @brief Finds a cloned version of a node. + * + * @param [out] pNode - Returns the cloned node. + * @param [in] originalNode - original node handle. + * @param [in] clonedGraph - Cloned graph to query. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphNodeFindInClone(hipGraphNode_t* pNode, hipGraphNode_t originalNode, + hipGraph_t clonedGraph); + +/** + * @brief Creates an executable graph from a graph + * + * @param [out] pGraphExec - Pointer to instantiated executable graph. + * @param [in] graph - Instance of graph to instantiate. + * @param [out] pErrorNode - Pointer to error node. In case an error occured during + * graph instantiation, it could modify the corresponding node. + * @param [out] pLogBuffer - Pointer to log buffer. + * @param [out] bufferSize - Size of the log buffer. + * + * @returns #hipSuccess, #hipErrorOutOfMemory + * + */ +hipError_t hipGraphInstantiate(hipGraphExec_t* pGraphExec, hipGraph_t graph, + hipGraphNode_t* pErrorNode, char* pLogBuffer, size_t bufferSize); + +/** + * @brief Creates an executable graph from a graph. + * + * @param [out] pGraphExec - Pointer to instantiated executable graph. + * @param [in] graph - Instance of graph to instantiate. + * @param [in] flags - Flags to control instantiation. + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @warning This API does not support any of flag and is behaving as hipGraphInstantiate. + */ +hipError_t hipGraphInstantiateWithFlags(hipGraphExec_t* pGraphExec, hipGraph_t graph, + unsigned long long flags); + +/** + * @brief Creates an executable graph from a graph. + * + * @param [out] pGraphExec - Pointer to instantiated executable graph. + * @param [in] graph - Instance of graph to instantiate. + * @param [in] instantiateParams - Graph instantiation Params + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphInstantiateWithParams(hipGraphExec_t* pGraphExec, hipGraph_t graph, + hipGraphInstantiateParams* instantiateParams); +/** + * @brief Launches an executable graph in the specified stream. + * + * @param [in] graphExec - Instance of executable graph to launch. + * @param [in] stream - Instance of stream in which to launch executable graph. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphLaunch(hipGraphExec_t graphExec, hipStream_t stream); + +/** + * @brief Uploads an executable graph to a stream + * + * @param [in] graphExec - Instance of executable graph to be uploaded. + * @param [in] stream - Instance of stream to which the executable graph is uploaded to. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphUpload(hipGraphExec_t graphExec, hipStream_t stream); + +/** + * @brief Creates a kernel execution node and adds it to a graph. + * + * @param [out] pGraphNode - Pointer to kernel graph node that is created. + * @param [in] graph - Instance of graph to add the created node to. + * @param [in] pDependencies - Pointer to the dependencies on the kernel execution node. + * @param [in] numDependencies - Number of dependencies. + * @param [in] nodeParams - Pointer to the node parameters. + * @returns #hipSuccess, #hipErrorInvalidValue. + * + */ +hipError_t hipGraphAddNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + hipGraphNodeParams* nodeParams); + +/** + * @brief Return the flags of an executable graph. + * + * @param [in] graphExec - Executable graph to get the flags from. + * @param [out] flags - Flags used to instantiate this executable graph. + * @returns #hipSuccess, #hipErrorInvalidValue. + * + */ +hipError_t hipGraphExecGetFlags(hipGraphExec_t graphExec, unsigned long long* flags); + +/** + * @brief Updates parameters of a graph's node. + * + * @param [in] node - Instance of the node to set parameters for. + * @param [in] nodeParams - Pointer to the parameters to be set. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDeviceFunction, + * #hipErrorNotSupported. + * + */ +hipError_t hipGraphNodeSetParams(hipGraphNode_t node, hipGraphNodeParams* nodeParams); + +/** + * @brief Updates parameters of an executable graph's node. + * + * @param [in] graphExec - Instance of the executable graph. + * @param [in] node - Instance of the node to set parameters to. + * @param [in] nodeParams - Pointer to the parameters to be set. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDeviceFunction, + * #hipErrorNotSupported. + * + */ +hipError_t hipGraphExecNodeSetParams(hipGraphExec_t graphExec, hipGraphNode_t node, + hipGraphNodeParams* nodeParams); + +/** + * @brief Destroys an executable graph + * + * @param [in] graphExec - Instance of executable graph to destroy. + * + * @returns #hipSuccess. + * + */ +hipError_t hipGraphExecDestroy(hipGraphExec_t graphExec); + +// Check whether an executable graph can be updated with a graph and perform the update if possible. +/** + * @brief Check whether an executable graph can be updated with a graph and perform the update if * + * possible. + * + * @param [in] hGraphExec - instance of executable graph to update. + * @param [in] hGraph - graph that contains the updated parameters. + * @param [in] hErrorNode_out - node which caused the permissibility check to forbid the update. + * @param [in] updateResult_out - Return code whether the graph update was performed. + * @returns #hipSuccess, #hipErrorGraphExecUpdateFailure + * + */ +hipError_t hipGraphExecUpdate(hipGraphExec_t hGraphExec, hipGraph_t hGraph, + hipGraphNode_t* hErrorNode_out, + hipGraphExecUpdateResult* updateResult_out); + +/** + * @brief Creates a kernel execution node and adds it to a graph. + * + * @param [out] pGraphNode - Pointer to graph node that is created + * @param [in] graph - Instance of graph to add the created node to. + * @param [in] pDependencies - Pointer to the dependencies of the kernel execution node. + * @param [in] numDependencies - The number of the dependencies. + * @param [in] pNodeParams - Pointer to the parameters of the kernel execution node. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDeviceFunction + * + */ +hipError_t hipGraphAddKernelNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + const hipKernelNodeParams* pNodeParams); + +/** + * @brief Gets kernel node's parameters. + * + * @param [in] node - instance of the node to get parameters from. + * @param [out] pNodeParams - pointer to the parameters + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphKernelNodeGetParams(hipGraphNode_t node, hipKernelNodeParams* pNodeParams); + +/** + * @brief Sets a kernel node's parameters. + * + * @param [in] node - Instance of the node to set parameters of. + * @param [in] pNodeParams - const pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphKernelNodeSetParams(hipGraphNode_t node, const hipKernelNodeParams* pNodeParams); + +/** + * @brief Sets the parameters for a kernel node in the given graphExec. + * + * @param [in] hGraphExec - Instance of the executable graph with the node. + * @param [in] node - Instance of the node to set parameters of. + * @param [in] pNodeParams - const pointer to the kernel node parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExecKernelNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, + const hipKernelNodeParams* pNodeParams); + +/** + * @brief Creates a memcpy node and adds it to a graph. + * + * @param [out] phGraphNode - Pointer to graph node that is created. + * @param [in] hGraph - Instance of graph to add the created node to. + * @param [in] dependencies - const pointer to the dependencies of the memcpy execution node. + * @param [in] numDependencies - The number of dependencies. + * @param [in] copyParams - const pointer to the parameters for the memory copy. + * @param [in] ctx - context related to current device. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipDrvGraphAddMemcpyNode(hipGraphNode_t* phGraphNode, hipGraph_t hGraph, + const hipGraphNode_t* dependencies, size_t numDependencies, + const HIP_MEMCPY3D* copyParams, hipCtx_t ctx); +/** + * @brief Creates a memcpy node and adds it to a graph. + * + * @param [out] pGraphNode - Pointer to graph node that is created. + * @param [in] graph - Instance of graph to add the created node to. + * @param [in] pDependencies - const pointer to the dependencies of the memcpy execution node. + * @param [in] numDependencies - The number of dependencies. + * @param [in] pCopyParams - const pointer to the parameters for the memory copy. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddMemcpyNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + const hipMemcpy3DParms* pCopyParams); +/** + * @brief Gets a memcpy node's parameters. + * + * @param [in] node - instance of the node to get parameters from. + * @param [out] pNodeParams - pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphMemcpyNodeGetParams(hipGraphNode_t node, hipMemcpy3DParms* pNodeParams); + +/** + * @brief Sets a memcpy node's parameters. + * + * @param [in] node - instance of the node to set parameters to. + * @param [in] pNodeParams - const pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphMemcpyNodeSetParams(hipGraphNode_t node, const hipMemcpy3DParms* pNodeParams); + +/** + * @brief Sets a node's attribute. + * + * @param [in] hNode - Instance of the node to set parameters of. + * @param [in] attr - The attribute type to be set. + * @param [in] value - const pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphKernelNodeSetAttribute(hipGraphNode_t hNode, hipKernelNodeAttrID attr, + const hipKernelNodeAttrValue* value); +/** + * @brief Gets a node's attribute. + * + * @param [in] hNode - Instance of the node to set parameters of. + * @param [in] attr - The attribute type to be set. + * @param [in] value - const pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphKernelNodeGetAttribute(hipGraphNode_t hNode, hipKernelNodeAttrID attr, + hipKernelNodeAttrValue* value); +/** + * @brief Sets the parameters of a memcpy node in the given graphExec. + * + * @param [in] hGraphExec - Instance of the executable graph with the node. + * @param [in] node - Instance of the node to set parameters of. + * @param [in] pNodeParams - const pointer to the kernel node parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExecMemcpyNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, + hipMemcpy3DParms* pNodeParams); + +/** + * @brief Creates a 1D memcpy node and adds it to a graph. + * + * @param [out] pGraphNode - Pointer to graph node that is created. + * @param [in] graph - Instance of graph to add the created node to. + * @param [in] pDependencies - const pointer to the dependencies of the memcpy execution node. + * @param [in] numDependencies - The number of dependencies. + * @param [in] dst - Pointer to memory address of the destination. + * @param [in] src - Pointer to memory address of the source. + * @param [in] count - Size of the memory to copy. + * @param [in] kind - Type of memory copy. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddMemcpyNode1D(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + void* dst, const void* src, size_t count, hipMemcpyKind kind); + +/** + * @brief Sets a memcpy node's parameters to perform a 1-dimensional copy. + * + * @param [in] node - Instance of the node to set parameters of. + * @param [in] dst - Pointer to memory address of the destination. + * @param [in] src - Pointer to memory address of the source. + * @param [in] count - Size of the memory to copy. + * @param [in] kind - Type of memory copy. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphMemcpyNodeSetParams1D(hipGraphNode_t node, void* dst, const void* src, + size_t count, hipMemcpyKind kind); + +/** + * @brief Sets the parameters for a memcpy node in the given graphExec to perform a 1-dimensional + * copy. + * + * @param [in] hGraphExec - Instance of the executable graph with the node. + * @param [in] node - Instance of the node to set parameters of. + * @param [in] dst - Pointer to memory address of the destination. + * @param [in] src - Pointer to memory address of the source. + * @param [in] count - Size of the memory to copy. + * @param [in] kind - Type of memory copy. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExecMemcpyNodeSetParams1D(hipGraphExec_t hGraphExec, hipGraphNode_t node, + void* dst, const void* src, size_t count, + hipMemcpyKind kind); + +/** + * @brief Creates a memcpy node to copy from a symbol on the device and adds it to a graph. + * + * @param [out] pGraphNode - Pointer to graph node that is created. + * @param [in] graph - Instance of graph to add the created node to. + * @param [in] pDependencies - const pointer to the dependencies of the memcpy execution node. + * @param [in] numDependencies - Number of the dependencies. + * @param [in] dst - Pointer to memory address of the destination. + * @param [in] symbol - Device symbol address. + * @param [in] count - Size of the memory to copy. + * @param [in] offset - Offset from start of symbol in bytes. + * @param [in] kind - Type of memory copy. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddMemcpyNodeFromSymbol(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, + size_t numDependencies, void* dst, const void* symbol, + size_t count, size_t offset, hipMemcpyKind kind); + +/** + * @brief Sets a memcpy node's parameters to copy from a symbol on the device. + * + * @param [in] node - Instance of the node to set parameters of. + * @param [in] dst - Pointer to memory address of the destination. + * @param [in] symbol - Device symbol address. + * @param [in] count - Size of the memory to copy. + * @param [in] offset - Offset from start of symbol in bytes. + * @param [in] kind - Type of memory copy. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphMemcpyNodeSetParamsFromSymbol(hipGraphNode_t node, void* dst, const void* symbol, + size_t count, size_t offset, hipMemcpyKind kind); + +/** + * @brief Sets the parameters for a memcpy node in the given graphExec to copy from a symbol on the + * * device. + * + * @param [in] hGraphExec - Instance of the executable graph with the node. + * @param [in] node - Instance of the node to set parameters of. + * @param [in] dst - Pointer to memory address of the destination. + * @param [in] symbol - Device symbol address. + * @param [in] count - Size of the memory to copy. + * @param [in] offset - Offset from start of symbol in bytes. + * @param [in] kind - Type of memory copy. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExecMemcpyNodeSetParamsFromSymbol(hipGraphExec_t hGraphExec, hipGraphNode_t node, + void* dst, const void* symbol, size_t count, + size_t offset, hipMemcpyKind kind); + +/** + * @brief Creates a memcpy node to copy to a symbol on the device and adds it to a graph. + * + * @param [out] pGraphNode - Pointer to graph node that is created. + * @param [in] graph - Instance of graph to add the created node to. + * @param [in] pDependencies - const pointer to the dependencies on the memcpy execution node. + * @param [in] numDependencies - Number of dependencies. + * @param [in] symbol - Device symbol address. + * @param [in] src - Pointer to memory address of the src. + * @param [in] count - Size of the memory to copy. + * @param [in] offset - Offset from start of symbol in bytes. + * @param [in] kind - Type of memory copy. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddMemcpyNodeToSymbol(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, + size_t numDependencies, const void* symbol, + const void* src, size_t count, size_t offset, + hipMemcpyKind kind); + +/** + * @brief Sets a memcpy node's parameters to copy to a symbol on the device. + * + * @param [in] node - Instance of the node to set parameters of. + * @param [in] symbol - Device symbol address. + * @param [in] src - Pointer to memory address of the src. + * @param [in] count - Size of the memory to copy. + * @param [in] offset - Offset from start of symbol in bytes. + * @param [in] kind - Type of memory copy. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphMemcpyNodeSetParamsToSymbol(hipGraphNode_t node, const void* symbol, + const void* src, size_t count, size_t offset, + hipMemcpyKind kind); + + +/** + * @brief Sets the parameters for a memcpy node in the given graphExec to copy to a symbol on the + * device. + * @param [in] hGraphExec - Instance of the executable graph with the node. + * @param [in] node - Instance of the node to set parameters of. + * @param [in] symbol - Device symbol address. + * @param [in] src - Pointer to memory address of the src. + * @param [in] count - Size of the memory to copy. + * @param [in] offset - Offset from start of symbol in bytes. + * @param [in] kind - Type of memory copy. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExecMemcpyNodeSetParamsToSymbol(hipGraphExec_t hGraphExec, hipGraphNode_t node, + const void* symbol, const void* src, + size_t count, size_t offset, hipMemcpyKind kind); + +/** + * @brief Creates a memset node and adds it to a graph. + * + * @param [out] pGraphNode - Pointer to graph node that is created. + * @param [in] graph - Instance of the graph to add the created node to. + * @param [in] pDependencies - const pointer to the dependencies on the memset execution node. + * @param [in] numDependencies - Number of dependencies. + * @param [in] pMemsetParams - const pointer to the parameters for the memory set. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddMemsetNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + const hipMemsetParams* pMemsetParams); + +/** + * @brief Gets a memset node's parameters. + * + * @param [in] node - Instance of the node to get parameters of. + * @param [out] pNodeParams - Pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphMemsetNodeGetParams(hipGraphNode_t node, hipMemsetParams* pNodeParams); + +/** + * @brief Sets a memset node's parameters. + * + * @param [in] node - Instance of the node to set parameters of. + * @param [in] pNodeParams - Pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphMemsetNodeSetParams(hipGraphNode_t node, const hipMemsetParams* pNodeParams); + +/** + * @brief Sets the parameters for a memset node in the given graphExec. + * + * @param [in] hGraphExec - Instance of the executable graph with the node. + * @param [in] node - Instance of the node to set parameters of. + * @param [in] pNodeParams - Pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExecMemsetNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, + const hipMemsetParams* pNodeParams); + +/** + * @brief Creates a host execution node and adds it to a graph. + * + * @param [out] pGraphNode - Pointer to graph node that is created. + * @param [in] graph - Instance of the graph to add the created node to. + * @param [in] pDependencies - const pointer to the dependencies of the memset execution node. + * @param [in] numDependencies - Number of dependencies. + * @param [in] pNodeParams - Pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddHostNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + const hipHostNodeParams* pNodeParams); + +/** + * @brief Returns a host node's parameters. + * + * @param [in] node - Instance of the node to get parameters of. + * @param [out] pNodeParams - Pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphHostNodeGetParams(hipGraphNode_t node, hipHostNodeParams* pNodeParams); + +/** + * @brief Sets a host node's parameters. + * + * @param [in] node - Instance of the node to set parameters of. + * @param [in] pNodeParams - Pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphHostNodeSetParams(hipGraphNode_t node, const hipHostNodeParams* pNodeParams); + +/** + * @brief Sets the parameters for a host node in the given graphExec. + * + * @param [in] hGraphExec - Instance of the executable graph with the node. + * @param [in] node - Instance of the node to set parameters of. + * @param [in] pNodeParams - Pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExecHostNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, + const hipHostNodeParams* pNodeParams); + +/** + * @brief Creates a child graph node and adds it to a graph. + * + * @param [out] pGraphNode - Pointer to graph node that is created. + * @param [in] graph - Instance of the graph to add the created node. + * @param [in] pDependencies - const pointer to the dependencies of the memset execution node. + * @param [in] numDependencies - Number of dependencies. + * @param [in] childGraph - Graph to clone into this node + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddChildGraphNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + hipGraph_t childGraph); + +/** + * @brief Gets a handle to the embedded graph of a child graph node. + * + * @param [in] node - Instance of the node to get child graph of. + * @param [out] pGraph - Pointer to get the graph. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphChildGraphNodeGetGraph(hipGraphNode_t node, hipGraph_t* pGraph); + +/** + * @brief Updates node parameters in the child graph node in the given graphExec. + * + * @param [in] hGraphExec - instance of the executable graph with the node. + * @param [in] node - node from the graph which was used to instantiate graphExec. + * @param [in] childGraph - child graph with updated parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExecChildGraphNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, + hipGraph_t childGraph); + +/** + * @brief Creates an empty node and adds it to a graph. + * + * @param [out] pGraphNode - Pointer to graph node that is created. + * @param [in] graph - Instance of the graph the node is added to. + * @param [in] pDependencies - const pointer to the node dependencies. + * @param [in] numDependencies - Number of dependencies. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddEmptyNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies); + + +/** + * @brief Creates an event record node and adds it to a graph. + * + * @param [out] pGraphNode - Pointer to graph node that is created. + * @param [in] graph - Instance of the graph the node is added to. + * @param [in] pDependencies - const pointer to the node dependencies. + * @param [in] numDependencies - Number of dependencies. + * @param [in] event - Event of the node. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddEventRecordNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + hipEvent_t event); + +/** + * @brief Returns the event associated with an event record node. + * + * @param [in] node - Instance of the node to get event of. + * @param [out] event_out - Pointer to return the event. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphEventRecordNodeGetEvent(hipGraphNode_t node, hipEvent_t* event_out); + +/** + * @brief Sets an event record node's event. + * + * @param [in] node - Instance of the node to set event to. + * @param [in] event - Pointer to the event. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphEventRecordNodeSetEvent(hipGraphNode_t node, hipEvent_t event); + +/** + * @brief Sets the event for an event record node in the given graphExec. + * + * @param [in] hGraphExec - instance of the executable graph with the node. + * @param [in] hNode - node from the graph which was used to instantiate graphExec. + * @param [in] event - pointer to the event. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExecEventRecordNodeSetEvent(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + hipEvent_t event); + +/** + * @brief Creates an event wait node and adds it to a graph. + * + * @param [out] pGraphNode - Pointer to graph node that is created. + * @param [in] graph - Instance of the graph the node to be added. + * @param [in] pDependencies - const pointer to the node dependencies. + * @param [in] numDependencies - Number of dependencies. + * @param [in] event - Event for the node. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddEventWaitNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + hipEvent_t event); + + +/** + * @brief Returns the event associated with an event wait node. + * + * @param [in] node - Instance of the node to get event of. + * @param [out] event_out - Pointer to return the event. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphEventWaitNodeGetEvent(hipGraphNode_t node, hipEvent_t* event_out); + +/** + * @brief Sets an event wait node's event. + * + * @param [in] node - Instance of the node to set event of. + * @param [in] event - Pointer to the event. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphEventWaitNodeSetEvent(hipGraphNode_t node, hipEvent_t event); + +/** + * @brief Sets the event for an event record node in the given graphExec. + * + * @param [in] hGraphExec - instance of the executable graph with the node. + * @param [in] hNode - node from the graph which was used to instantiate graphExec. + * @param [in] event - pointer to the event. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExecEventWaitNodeSetEvent(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + hipEvent_t event); + +/** + * @brief Creates a memory allocation node and adds it to a graph + * + * @param [out] pGraphNode - Pointer to the graph node to create and add to the graph + * @param [in] graph - Instance of the graph node to be added + * @param [in] pDependencies - Const pointer to the node dependencies + * @param [in] numDependencies - The number of dependencies + * @param [in, out] pNodeParams - Node parameters for memory allocation, returns a pointer to the + * allocated memory. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddMemAllocNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + hipMemAllocNodeParams* pNodeParams); + +/** + * @brief Returns parameters for memory allocation node + * + * @param [in] node - Memory allocation node to query + * @param [out] pNodeParams - Parameters for the specified memory allocation node + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphMemAllocNodeGetParams(hipGraphNode_t node, hipMemAllocNodeParams* pNodeParams); + +/** + * @brief Creates a memory free node and adds it to a graph + * + * @param [out] pGraphNode - Pointer to the graph node to create and add to the graph + * @param [in] graph - Instance of the graph node to be added + * @param [in] pDependencies - Const pointer to the node dependencies + * @param [in] numDependencies - The number of dependencies + * @param [in] dev_ptr - Pointer to the memory to be freed + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddMemFreeNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + void* dev_ptr); + +/** + * @brief Returns parameters for memory free node + * + * @param [in] node - Memory free node to query + * @param [out] dev_ptr - Device pointer of the specified memory free node + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphMemFreeNodeGetParams(hipGraphNode_t node, void* dev_ptr); + +/** + * @brief Get the mem attribute for graphs. + * + * @param [in] device - Device to get attributes from + * @param [in] attr - Attribute type to be queried + * @param [out] value - Value of the queried attribute + * @returns #hipSuccess, #hipErrorInvalidDevice + * + */ +hipError_t hipDeviceGetGraphMemAttribute(int device, hipGraphMemAttributeType attr, void* value); + +/** + * @brief Set the mem attribute for graphs. + * + * @param [in] device - Device to set attribute of. + * @param [in] attr - Attribute type to be set. + * @param [in] value - Value of the attribute. + * @returns #hipSuccess, #hipErrorInvalidDevice + * + */ +hipError_t hipDeviceSetGraphMemAttribute(int device, hipGraphMemAttributeType attr, void* value); + +/** + * @brief Free unused memory reserved for graphs on a specific device and return it back to the OS. + * + * @param [in] device - Device for which memory should be trimmed + * @returns #hipSuccess, #hipErrorInvalidDevice + * + */ +hipError_t hipDeviceGraphMemTrim(int device); + +/** + * @brief Create an instance of userObject to manage lifetime of a resource. + * + * @param [out] object_out - pointer to instace of userobj. + * @param [in] ptr - pointer to pass to destroy function. + * @param [in] destroy - destroy callback to remove resource. + * @param [in] initialRefcount - reference to resource. + * @param [in] flags - flags passed to API. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipUserObjectCreate(hipUserObject_t* object_out, void* ptr, hipHostFn_t destroy, + unsigned int initialRefcount, unsigned int flags); + +/** + * @brief Release number of references to resource. + * + * @param [in] object - pointer to instace of userobj. + * @param [in] count - reference to resource to be retained. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipUserObjectRelease(hipUserObject_t object, unsigned int count __dparm(1)); + +/** + * @brief Retain number of references to resource. + * + * @param [in] object - pointer to instace of userobj. + * @param [in] count - reference to resource to be retained. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipUserObjectRetain(hipUserObject_t object, unsigned int count __dparm(1)); + +/** + * @brief Retain user object for graphs. + * + * @param [in] graph - pointer to graph to retain the user object for. + * @param [in] object - pointer to instace of userobj. + * @param [in] count - reference to resource to be retained. + * @param [in] flags - flags passed to API. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphRetainUserObject(hipGraph_t graph, hipUserObject_t object, + unsigned int count __dparm(1), unsigned int flags __dparm(0)); + +/** + * @brief Release user object from graphs. + * + * @param [in] graph - pointer to graph to retain the user object for. + * @param [in] object - pointer to instace of userobj. + * @param [in] count - reference to resource to be retained. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphReleaseUserObject(hipGraph_t graph, hipUserObject_t object, + unsigned int count __dparm(1)); + +/** + * @brief Write a DOT file describing graph structure. + * + * @param [in] graph - graph object for which DOT file has to be generated. + * @param [in] path - path to write the DOT file. + * @param [in] flags - Flags from hipGraphDebugDotFlags to get additional node information. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorOperatingSystem + * + */ +hipError_t hipGraphDebugDotPrint(hipGraph_t graph, const char* path, unsigned int flags); + +/** + * @brief Copies attributes from source node to destination node. + * + * Copies attributes from source node to destination node. + * Both node must have the same context. + * + * @param [out] hDst - Destination node. + * @param [in] hSrc - Source node. + * For list of attributes see ::hipKernelNodeAttrID. + * + * @returns #hipSuccess, #hipErrorInvalidContext + * + */ +hipError_t hipGraphKernelNodeCopyAttributes(hipGraphNode_t hSrc, hipGraphNode_t hDst); + +/** + * @brief Enables or disables the specified node in the given graphExec + * + * Sets hNode to be either enabled or disabled. Disabled nodes are functionally equivalent + * to empty nodes until they are reenabled. Existing node parameters are not affected by + * disabling/enabling the node. + * + * The node is identified by the corresponding hNode in the non-executable graph, from which the + * executable graph was instantiated. + * + * hNode must not have been removed from the original graph. + * + * @note Currently only kernel, memset and memcpy nodes are supported. + * + * @param [in] hGraphExec - The executable graph in which to set the specified node. + * @param [in] hNode - Node from the graph from which graphExec was instantiated. + * @param [in] isEnabled - Node is enabled if != 0, otherwise the node is disabled. + * + * @returns #hipSuccess, #hipErrorInvalidValue, + * + */ +hipError_t hipGraphNodeSetEnabled(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + unsigned int isEnabled); +/** + * @brief Query whether a node in the given graphExec is enabled + * + * Sets isEnabled to 1 if hNode is enabled, or 0 if it is disabled. + * + * The node is identified by the corresponding node in the non-executable graph, from which the + * executable graph was instantiated. + * + * hNode must not have been removed from the original graph. + * + * @note Currently only kernel, memset and memcpy nodes are supported. + * + * @param [in] hGraphExec - The executable graph in which to set the specified node. + * @param [in] hNode - Node from the graph from which graphExec was instantiated. + * @param [out] isEnabled - Location to return the enabled status of the node. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphNodeGetEnabled(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + unsigned int* isEnabled); + +/** + * @brief Creates a external semaphor wait node and adds it to a graph. + * + * @param [out] pGraphNode - pointer to the graph node to create. + * @param [in] graph - instance of the graph to add the created node. + * @param [in] pDependencies - const pointer to the dependencies on the memset execution node. + * @param [in] numDependencies - the number of the dependencies. + * @param [in] nodeParams -pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddExternalSemaphoresWaitNode( + hipGraphNode_t* pGraphNode, hipGraph_t graph, const hipGraphNode_t* pDependencies, + size_t numDependencies, const hipExternalSemaphoreWaitNodeParams* nodeParams); + +/** + * @brief Creates a external semaphor signal node and adds it to a graph. + * + * @param [out] pGraphNode - pointer to the graph node to create. + * @param [in] graph - instance of the graph to add the created node. + * @param [in] pDependencies - const pointer to the dependencies on the memset execution node. + * @param [in] numDependencies - the number of the dependencies. + * @param [in] nodeParams -pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddExternalSemaphoresSignalNode( + hipGraphNode_t* pGraphNode, hipGraph_t graph, const hipGraphNode_t* pDependencies, + size_t numDependencies, const hipExternalSemaphoreSignalNodeParams* nodeParams); +/** + * @brief Updates node parameters in the external semaphore signal node. + * + * @param [in] hNode - Node from the graph from which graphExec was instantiated. + * @param [in] nodeParams - Pointer to the params to be set. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExternalSemaphoresSignalNodeSetParams( + hipGraphNode_t hNode, const hipExternalSemaphoreSignalNodeParams* nodeParams); +/** + * @brief Updates node parameters in the external semaphore wait node. + * + * @param [in] hNode - Node from the graph from which graphExec was instantiated. + * @param [in] nodeParams - Pointer to the params to be set. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExternalSemaphoresWaitNodeSetParams( + hipGraphNode_t hNode, const hipExternalSemaphoreWaitNodeParams* nodeParams); +/** + * @brief Returns external semaphore signal node params. + * + * @param [in] hNode - Node from the graph from which graphExec was instantiated. + * @param [out] params_out - Pointer to params. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExternalSemaphoresSignalNodeGetParams( + hipGraphNode_t hNode, hipExternalSemaphoreSignalNodeParams* params_out); +/** + * @brief Returns external semaphore wait node params. + * + * @param [in] hNode - Node from the graph from which graphExec was instantiated. + * @param [out] params_out - Pointer to params. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExternalSemaphoresWaitNodeGetParams( + hipGraphNode_t hNode, hipExternalSemaphoreWaitNodeParams* params_out); +/** + * @brief Updates node parameters in the external semaphore signal node in the given graphExec. + * + * @param [in] hGraphExec - The executable graph in which to set the specified node. + * @param [in] hNode - Node from the graph from which graphExec was instantiated. + * @param [in] nodeParams - Pointer to the params to be set. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExecExternalSemaphoresSignalNodeSetParams( + hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + const hipExternalSemaphoreSignalNodeParams* nodeParams); +/** + * @brief Updates node parameters in the external semaphore wait node in the given graphExec. + * + * @param [in] hGraphExec - The executable graph in which to set the specified node. + * @param [in] hNode - Node from the graph from which graphExec was instantiated. + * @param [in] nodeParams - Pointer to the params to be set. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExecExternalSemaphoresWaitNodeSetParams( + hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + const hipExternalSemaphoreWaitNodeParams* nodeParams); + +/** + * @brief Gets a memcpy node's parameters. + * + * @param [in] hNode - instance of the node to get parameters from. + * @param [out] nodeParams - pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipDrvGraphMemcpyNodeGetParams(hipGraphNode_t hNode, HIP_MEMCPY3D* nodeParams); + +/** + * @brief Sets a memcpy node's parameters. + * + * @param [in] hNode - instance of the node to Set parameters for. + * @param [out] nodeParams - pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipDrvGraphMemcpyNodeSetParams(hipGraphNode_t hNode, const HIP_MEMCPY3D* nodeParams); + +/** + * @brief Creates a memset node and adds it to a graph. + * + * @param [out] phGraphNode - pointer to graph node to create. + * @param [in] hGraph - instance of graph to add the created node to. + * @param [in] dependencies - const pointer to the dependencies on the memset execution node. + * @param [in] numDependencies - number of the dependencies. + * @param [in] memsetParams - const pointer to the parameters for the memory set. + * @param [in] ctx - cotext related to current device. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipDrvGraphAddMemsetNode(hipGraphNode_t* phGraphNode, hipGraph_t hGraph, + const hipGraphNode_t* dependencies, size_t numDependencies, + const hipMemsetParams* memsetParams, hipCtx_t ctx); + +/** + * @brief Creates a memory free node and adds it to a graph + * + * @param [out] phGraphNode - Pointer to the graph node to create and add to the graph + * @param [in] hGraph - Instance of the graph the node to be added + * @param [in] dependencies - Const pointer to the node dependencies + * @param [in] numDependencies - The number of dependencies + * @param [in] dptr - Pointer to the memory to be freed + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipDrvGraphAddMemFreeNode(hipGraphNode_t* phGraphNode, hipGraph_t hGraph, + const hipGraphNode_t* dependencies, size_t numDependencies, + hipDeviceptr_t dptr); + +/** + * @brief Sets the parameters for a memcpy node in the given graphExec. + * + * @param [in] hGraphExec - instance of the executable graph with the node. + * @param [in] hNode - instance of the node to set parameters to. + * @param [in] copyParams - const pointer to the memcpy node params. + * @param [in] ctx - cotext related to current device. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipDrvGraphExecMemcpyNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + const HIP_MEMCPY3D* copyParams, hipCtx_t ctx); + +/** + * @brief Sets the parameters for a memset node in the given graphExec. + * + * @param [in] hGraphExec - instance of the executable graph with the node. + * @param [in] hNode - instance of the node to set parameters to. + * @param [in] memsetParams - pointer to the parameters. + * @param [in] ctx - cotext related to current device. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipDrvGraphExecMemsetNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + const hipMemsetParams* memsetParams, hipCtx_t ctx); + +// doxygen end graph API +/** + * @} + */ + + +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Virtual Virtual Memory Management + * @{ + * This section describes the virtual memory management functions of HIP runtime API. + * + * @note Please note, the virtual memory management functions of HIP runtime + * API are implemented on Linux, under development on Windows. The + * following Virtual Memory Management APIs are not (yet) + * supported in HIP: + * - hipMemMapArrayAsync + */ + +/** + * @brief Frees an address range reservation made via hipMemAddressReserve + * + * @param [in] devPtr - starting address of the range. + * @param [in] size - size of the range. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemAddressFree(void* devPtr, size_t size); + +/** + * @brief Reserves an address range + * + * @param [out] ptr - starting address of the reserved range. + * @param [in] size - size of the reservation. + * @param [in] alignment - alignment of the address. + * @param [in] addr - requested starting address of the range. + * @param [in] flags - currently unused, must be zero. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemAddressReserve(void** ptr, size_t size, size_t alignment, void* addr, + unsigned long long flags); + +/** + * @brief Creates a memory allocation described by the properties and size + * + * @param [out] handle - value of the returned handle. + * @param [in] size - size of the allocation. + * @param [in] prop - properties of the allocation. + * @param [in] flags - currently unused, must be zero. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemCreate(hipMemGenericAllocationHandle_t* handle, size_t size, + const hipMemAllocationProp* prop, unsigned long long flags); + +/** + * @brief Exports an allocation to a requested shareable handle type. + * + * @param [out] shareableHandle - value of the returned handle. + * @param [in] handle - handle to share. + * @param [in] handleType - type of the shareable handle. + * @param [in] flags - currently unused, must be zero. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemExportToShareableHandle(void* shareableHandle, + hipMemGenericAllocationHandle_t handle, + hipMemAllocationHandleType handleType, + unsigned long long flags); + +/** + * @brief Get the access flags set for the given location and ptr. + * + * @param [out] flags - flags for this location. + * @param [in] location - target location. + * @param [in] ptr - address to check the access flags. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemGetAccess(unsigned long long* flags, const hipMemLocation* location, void* ptr); + +/** + * @brief Calculates either the minimal or recommended granularity. + * + * @param [out] granularity - returned granularity. + * @param [in] prop - location properties. + * @param [in] option - determines which granularity to return. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + * + */ +hipError_t hipMemGetAllocationGranularity(size_t* granularity, const hipMemAllocationProp* prop, + hipMemAllocationGranularity_flags option); + +/** + * @brief Retrieve the property structure of the given handle. + * + * @param [out] prop - properties of the given handle. + * @param [in] handle - handle to perform the query on. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemGetAllocationPropertiesFromHandle(hipMemAllocationProp* prop, + hipMemGenericAllocationHandle_t handle); + +/** + * @brief Imports an allocation from a requested shareable handle type. + * + * @param [out] handle - returned value. + * @param [in] osHandle - shareable handle representing the memory allocation. + * @param [in] shHandleType - handle type. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemImportFromShareableHandle(hipMemGenericAllocationHandle_t* handle, void* osHandle, + hipMemAllocationHandleType shHandleType); + +/** + * @brief Maps an allocation handle to a reserved virtual address range. + * + * @param [in] ptr - address where the memory will be mapped. + * @param [in] size - size of the mapping. + * @param [in] offset - offset into the memory, currently must be zero. + * @param [in] handle - memory allocation to be mapped. + * @param [in] flags - currently unused, must be zero. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemMap(void* ptr, size_t size, size_t offset, hipMemGenericAllocationHandle_t handle, + unsigned long long flags); + +/** + * @brief Maps or unmaps subregions of sparse HIP arrays and sparse HIP mipmapped arrays. + * + * @param [in] mapInfoList - list of hipArrayMapInfo. + * @param [in] count - number of hipArrayMapInfo in mapInfoList. + * @param [in] stream - stream identifier for the stream to use for map or unmap operations. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is under development. Currently it is not supported on AMD + * GPUs and returns #hipErrorNotSupported. + */ +hipError_t hipMemMapArrayAsync(hipArrayMapInfo* mapInfoList, unsigned int count, + hipStream_t stream); + +/** + * @brief Release a memory handle representing a memory allocation which was previously allocated + * through hipMemCreate. + * + * @param [in] handle - handle of the memory allocation. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemRelease(hipMemGenericAllocationHandle_t handle); + +/** + * @brief Returns the allocation handle of the backing memory allocation given the address. + * + * @param [out] handle - handle representing addr. + * @param [in] addr - address to look up. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemRetainAllocationHandle(hipMemGenericAllocationHandle_t* handle, void* addr); + +/** + * @brief Set the access flags for each location specified in desc for the given virtual address + * range. + * + * @param [in] ptr - starting address of the virtual address range. + * @param [in] size - size of the range. + * @param [in] desc - array of hipMemAccessDesc. + * @param [in] count - number of hipMemAccessDesc in desc. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemSetAccess(void* ptr, size_t size, const hipMemAccessDesc* desc, size_t count); + +/** + * @brief Unmap memory allocation of a given address range. + * + * @param [in] ptr - starting address of the range to unmap. + * @param [in] size - size of the virtual address range. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemUnmap(void* ptr, size_t size); + +// doxygen end virtual memory management API +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup GraphicsInterop Graphics Interoperability + * @{ + * This section describes graphics interoperability functions of HIP runtime API. + */ + +/** + * @brief Maps a graphics resource for access. + * + * @param [in] count - Number of resources to map. + * @param [in] resources - Pointer of resources to map. + * @param [in] stream - Stream for synchronization. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorUnknown, #hipErrorInvalidResourceHandle + * + */ +hipError_t hipGraphicsMapResources(int count, hipGraphicsResource_t* resources, + hipStream_t stream __dparm(0)); +/** + * @brief Get an array through which to access a subresource of a mapped graphics resource. + * + * @param [out] array - Pointer of array through which a subresource of resource may be accessed. + * @param [in] resource - Mapped resource to access. + * @param [in] arrayIndex - Array index for the subresource to access. + * @param [in] mipLevel - Mipmap level for the subresource to access. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @note In this API, the value of arrayIndex higher than zero is currently not supported. + * + */ +hipError_t hipGraphicsSubResourceGetMappedArray(hipArray_t* array, hipGraphicsResource_t resource, + unsigned int arrayIndex, unsigned int mipLevel); +/** + * @brief Gets device accessible address of a graphics resource. + * + * @param [out] devPtr - Pointer of device through which graphic resource may be accessed. + * @param [out] size - Size of the buffer accessible from devPtr. + * @param [in] resource - Mapped resource to access. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphicsResourceGetMappedPointer(void** devPtr, size_t* size, + hipGraphicsResource_t resource); +/** + * @brief Unmaps graphics resources. + * + * @param [in] count - Number of resources to unmap. + * @param [in] resources - Pointer of resources to unmap. + * @param [in] stream - Stream for synchronization. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorUnknown, #hipErrorContextIsDestroyed + * + */ +hipError_t hipGraphicsUnmapResources(int count, hipGraphicsResource_t* resources, + hipStream_t stream __dparm(0)); +/** + * @brief Unregisters a graphics resource. + * + * @param [in] resource - Graphics resources to unregister. + * + * @returns #hipSuccess + * + */ +hipError_t hipGraphicsUnregisterResource(hipGraphicsResource_t resource); +// doxygen end GraphicsInterop +/** + * @} + */ + +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Surface Surface Object + * @{ + * + * This section describes surface object functions of HIP runtime API. + * + * @note APIs in this section are under development. + * + */ + +/** + * @brief Create a surface object. + * + * @param [out] pSurfObject Pointer of surface object to be created. + * @param [in] pResDesc Pointer of suface object descriptor. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject, const hipResourceDesc* pResDesc); +/** + * @brief Destroy a surface object. + * + * @param [in] surfaceObject Surface object to be destroyed. + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject); +// end of surface +/** + * @} + */ + +/** + * @brief Enable HIP runtime logging. + * + * This function enables the HIP runtime logging mechanism, allowing diagnostic + * and trace information to be captured during HIP API execution. + * + * @returns #hipSuccess + * + * @see hipExtDisableLogging, hipExtSetLoggingParams + */ +hipError_t hipExtEnableLogging(); +/** + * @brief Disable HIP runtime logging. + * + * This function disables the HIP runtime logging mechanism, stopping the capture + * of diagnostic and trace information during HIP API execution. + * + * @returns #hipSuccess + * + * @see hipExtEnableLogging, hipExtSetLoggingParams + */ +hipError_t hipExtDisableLogging(); +/** + * @brief Set HIP runtime logging parameters. + * + * This function configures the logging behavior of the HIP runtime, including + * the verbosity level, buffer size, and which components to log. + * + * @param [in] log_level The logging verbosity level. Higher values produce more detailed output. + * @param [in] log_size Reserved for future use. Currently not implemented. + * @param [in] log_mask A bitmask specifying which HIP runtime components to log. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipExtEnableLogging, hipExtDisableLogging + */ +hipError_t hipExtSetLoggingParams(size_t log_level, size_t log_size, size_t log_mask); + +#ifdef __cplusplus +} /* extern "c" */ +#endif +#ifdef __cplusplus +#if defined(__clang__) && defined(__HIP__) +template static hipError_t __host__ inline hipOccupancyMaxPotentialBlockSize( + int* gridSize, int* blockSize, T f, size_t dynSharedMemPerBlk = 0, int blockSizeLimit = 0) { + return hipOccupancyMaxPotentialBlockSize(gridSize, blockSize, reinterpret_cast(f), + dynSharedMemPerBlk, blockSizeLimit); +} +template static hipError_t __host__ inline hipOccupancyMaxPotentialBlockSizeWithFlags( + int* gridSize, int* blockSize, T f, size_t dynSharedMemPerBlk = 0, int blockSizeLimit = 0, + unsigned int flags = 0) { + (void)flags; + return hipOccupancyMaxPotentialBlockSize(gridSize, blockSize, reinterpret_cast(f), + dynSharedMemPerBlk, blockSizeLimit); +} +#endif // defined(__clang__) && defined(__HIP__) + +/** + * @brief Gets the address of a symbol. + * @ingroup Memory + * @param [out] devPtr - Returns device pointer associated with symbol. + * @param [in] symbol - Device symbol. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +template hipError_t hipGetSymbolAddress(void** devPtr, const T& symbol) { + return ::hipGetSymbolAddress(devPtr, (const void*)&symbol); +} +/** + * @ingroup Memory + * @brief Gets the size of a symbol. + * + * @param [out] size - Returns the size of a symbol. + * @param [in] symbol - Device symbol address. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +template hipError_t hipGetSymbolSize(size_t* size, const T& symbol) { + return ::hipGetSymbolSize(size, (const void*)&symbol); +} + +/** + * @ingroup Memory + * @brief Copies data to the given symbol on the device. + * + * @returns #hipSuccess, #hipErrorInvalidMemcpyDirection, #hipErrorInvalidValue + * + * @see hipMemcpyToSymbol + */ +template +hipError_t hipMemcpyToSymbol(const T& symbol, const void* src, size_t sizeBytes, + size_t offset __dparm(0), + hipMemcpyKind kind __dparm(hipMemcpyHostToDevice)) { + return ::hipMemcpyToSymbol((const void*)&symbol, src, sizeBytes, offset, kind); +} +/** + * @ingroup Memory + * @brief Copies data to the given symbol on the device asynchronously on the stream. + * + * @returns #hipSuccess, #hipErrorInvalidMemcpyDirection, #hipErrorInvalidValue + * + * @see hipMemcpyToSymbolAsync + */ +template +hipError_t hipMemcpyToSymbolAsync(const T& symbol, const void* src, size_t sizeBytes, size_t offset, + hipMemcpyKind kind, hipStream_t stream __dparm(0)) { + return ::hipMemcpyToSymbolAsync((const void*)&symbol, src, sizeBytes, offset, kind, stream); +} +/** + * @brief Copies data from the given symbol on the device. + * @ingroup Memory + * @returns #hipSuccess, #hipErrorInvalidMemcpyDirection, #hipErrorInvalidValue + * + * @see hipMemcpyFromSymbol + */ +template +hipError_t hipMemcpyFromSymbol(void* dst, const T& symbol, size_t sizeBytes, + size_t offset __dparm(0), + hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost)) { + return ::hipMemcpyFromSymbol(dst, (const void*)&symbol, sizeBytes, offset, kind); +} +/** + * @brief Copies data from the given symbol on the device asynchronously on the stream. + * @ingroup Memory + * @returns #hipSuccess, #hipErrorInvalidMemcpyDirection, #hipErrorInvalidValue + * + * @see hipMemcpyFromSymbolAsync + */ +template +hipError_t hipMemcpyFromSymbolAsync(void* dst, const T& symbol, size_t sizeBytes, size_t offset, + hipMemcpyKind kind, hipStream_t stream __dparm(0)) { + return ::hipMemcpyFromSymbolAsync(dst, (const void*)&symbol, sizeBytes, offset, kind, stream); +} + +/** + * @brief Returns occupancy for a kernel function. + * @ingroup Occupancy + * @param [out] numBlocks - Pointer of occupancy in number of blocks. + * @param [in] f - The kernel function to launch on the device. + * @param [in] blockSize - The block size as kernel launched. + * @param [in] dynSharedMemPerBlk - Dynamic shared memory in bytes per block. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +template +inline hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, T f, int blockSize, + size_t dynSharedMemPerBlk) { + return hipOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, reinterpret_cast(f), + blockSize, dynSharedMemPerBlk); +} +/** + * @brief Returns occupancy for a device function with the specified flags. + * + * @ingroup Occupancy + * @param [out] numBlocks - Pointer of occupancy in number of blocks. + * @param [in] f - The kernel function to launch on the device. + * @param [in] blockSize - The block size as kernel launched. + * @param [in] dynSharedMemPerBlk - Dynamic shared memory in bytes per block. + * @param [in] flags - Flag to handle the behavior for the occupancy calculator. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +template inline hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + int* numBlocks, T f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags) { + return hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + numBlocks, reinterpret_cast(f), blockSize, dynSharedMemPerBlk, flags); +} +/** + * @brief Returns grid and block size that achieves maximum potential occupancy for a device + * function + * + * @ingroup Occupancy + * Returns in \p *min_grid_size and \p *block_size a suggested grid / + * block size pair that achieves the best potential occupancy + * (i.e. the maximum number of active warps on the current device with the smallest number + * of blocks for a particular function). + * + * @param [out] min_grid_size minimum grid size needed to achieve the best potential occupancy + * @param [out] block_size block size required for the best potential occupancy + * @param [in] func device function symbol + * @param [in] block_size_to_dynamic_smem_size - a unary function/functor that takes block size, + * and returns the size, in bytes, of dynamic shared memory needed for a block + * @param [in] block_size_limit the maximum block size \p func is designed to work with. 0 means no + * limit. + * @param [in] flags reserved + * + * @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidDeviceFunction, + * #hipErrorInvalidValue, #hipErrorUnknown + */ +template +static hipError_t __host__ inline hipOccupancyMaxPotentialBlockSizeVariableSMemWithFlags( + int* min_grid_size, int* block_size, T func, UnaryFunction block_size_to_dynamic_smem_size, + int block_size_limit = 0, unsigned int flags = 0) { + if (min_grid_size == nullptr || block_size == nullptr || + reinterpret_cast(func) == nullptr) { + return hipErrorInvalidValue; + } + + int dev; + hipError_t status; + if ((status = hipGetDevice(&dev)) != hipSuccess) { + return status; + } + + int max_threads_per_cu; + if ((status = hipDeviceGetAttribute(&max_threads_per_cu, + hipDeviceAttributeMaxThreadsPerMultiProcessor, dev)) != + hipSuccess) { + return status; + } + + int warp_size; + if ((status = hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, dev)) != hipSuccess) { + return status; + } + + int max_cu_count; + if ((status = hipDeviceGetAttribute(&max_cu_count, hipDeviceAttributeMultiprocessorCount, dev)) != + hipSuccess) { + return status; + } + + struct hipFuncAttributes attr; + if ((status = hipFuncGetAttributes(&attr, reinterpret_cast(func))) != hipSuccess) { + return status; + } + + // Initial limits for the execution + const int func_max_threads_per_block = attr.maxThreadsPerBlock; + if (block_size_limit == 0) { + block_size_limit = func_max_threads_per_block; + } + + if (func_max_threads_per_block < block_size_limit) { + block_size_limit = func_max_threads_per_block; + } + + const int block_size_limit_aligned = + ((block_size_limit + (warp_size - 1)) / warp_size) * warp_size; + + // For maximum search + int max_threads = 0; + int max_block_size{}; + int max_num_blocks{}; + for (int block_size_check_aligned = block_size_limit_aligned; block_size_check_aligned > 0; + block_size_check_aligned -= warp_size) { + // Make sure the logic uses the requested limit and not aligned + int block_size_check = + (block_size_limit < block_size_check_aligned) ? block_size_limit : block_size_check_aligned; + + size_t dyn_smem_size = block_size_to_dynamic_smem_size(block_size_check); + int optimal_blocks; + if ((status = hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + &optimal_blocks, func, block_size_check, dyn_smem_size, flags)) != hipSuccess) { + return status; + } + + int total_threads = block_size_check * optimal_blocks; + if (total_threads > max_threads) { + max_block_size = block_size_check; + max_num_blocks = optimal_blocks; + max_threads = total_threads; + } + + // Break if the logic reached possible maximum + if (max_threads_per_cu == max_threads) { + break; + } + } + + // Grid size is the number of blocks per CU * CU count + *min_grid_size = max_num_blocks * max_cu_count; + *block_size = max_block_size; + + return status; +} + +/** + * @brief Returns grid and block size that achieves maximum potential occupancy for a device + * function + * + * @ingroup Occupancy + * Returns in \p *min_grid_size and \p *block_size a suggested grid / + * block size pair that achieves the best potential occupancy + * (i.e. the maximum number of active warps on the current device with the smallest number + * of blocks for a particular function). + * + * @param [out] min_grid_size minimum grid size needed to achieve the best potential occupancy + * @param [out] block_size block size required for the best potential occupancy + * @param [in] func device function symbol + * @param [in] block_size_to_dynamic_smem_size - a unary function/functor that takes block size, + * and returns the size, in bytes, of dynamic shared memory needed for a block + * @param [in] block_size_limit the maximum block size \p func is designed to work with. 0 means no + * limit. + * + * @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidDeviceFunction, + * #hipErrorInvalidValue, #hipErrorUnknown + */ +template +static hipError_t __host__ inline hipOccupancyMaxPotentialBlockSizeVariableSMem( + int* min_grid_size, int* block_size, T func, UnaryFunction block_size_to_dynamic_smem_size, + int block_size_limit = 0) { + return hipOccupancyMaxPotentialBlockSizeVariableSMemWithFlags( + min_grid_size, block_size, func, block_size_to_dynamic_smem_size, block_size_limit); +} +/** + * @brief Returns grid and block size that achieves maximum potential occupancy for a device + * function + * + * @ingroup Occupancy + * + * Returns in \p *min_grid_size and \p *block_size a suggested grid / + * block size pair that achieves the best potential occupancy + * (i.e. the maximum number of active warps on the current device with the smallest number + * of blocks for a particular function). + * + * @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @see hipOccupancyMaxPotentialBlockSize + */ +template inline hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, + int* blockSize, F kernel, + size_t dynSharedMemPerBlk, + uint32_t blockSizeLimit) { + return hipOccupancyMaxPotentialBlockSize(gridSize, blockSize, (hipFunction_t)kernel, + dynSharedMemPerBlk, blockSizeLimit); +} + +/** + * @brief Returns dynamic shared memory available per block when launching numBlocks blocks on SM. + * + * @ingroup Occupancy + * Returns in \p *dynamicSmemSize the maximum size of dynamic shared memory / + * to allow numBlocks blocks per SM. + * + * @param [out] dynamicSmemSize Returned maximum dynamic shared memory. + * @param [in] f Kernel function for which occupancy is calculated. + * @param [in] numBlocks Number of blocks to fit on SM + * @param [in] blockSize Size of the block + * + * @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidDeviceFunction, #hipErrorInvalidValue, + * #hipErrorUnknown + */ +template +inline hipError_t hipOccupancyAvailableDynamicSMemPerBlock(size_t* dynamicSmemSize, F f, + int numBlocks, int blockSize) { + return hipOccupancyAvailableDynamicSMemPerBlock(dynamicSmemSize, reinterpret_cast(f), + numBlocks, blockSize); +} +/** + * @brief Launches a device function + * + * @ingroup Execution + * @ingroup ModuleCooperativeG + * + * \tparam T The type of the kernel function. + * + * @param [in] f Kernel function to launch. + * @param [in] gridDim Grid dimensions specified as multiple of blockDim. + * @param [in] blockDim Block dimensions specified in work-items. + * @param [in] kernelParams A list of kernel arguments. + * @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for + * this kernel. The HIP-Clang compiler provides + * support for extern shared declarations. + * @param [in] stream Stream which on the kernel launched. + * + * @return #hipSuccess, #hipErrorLaunchFailure, #hipErrorInvalidValue, + * #hipErrorInvalidResourceHandle + * + */ +template +inline hipError_t hipLaunchCooperativeKernel(T f, dim3 gridDim, dim3 blockDim, void** kernelParams, + unsigned int sharedMemBytes, hipStream_t stream) { + return hipLaunchCooperativeKernel(reinterpret_cast(f), gridDim, blockDim, + kernelParams, sharedMemBytes, stream); +} +/** + * @brief Launches kernel function on multiple devices, where thread blocks can + * cooperate and synchronize on execution. + * + * @ingroup Execution + * @ingroup ModuleCooperativeG + * + * @param [in] launchParamsList List of kernel launch parameters, one per device. + * @param [in] numDevices Size of launchParamsList array. + * @param [in] flags Flag to handle launch behavior. + * + * @return #hipSuccess, #hipErrorLaunchFailure, #hipErrorInvalidValue, + * #hipErrorInvalidResourceHandle + * + */ +template +inline hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, + unsigned int numDevices, + unsigned int flags = 0) { + return hipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags); +} +/** + * @brief Launches kernels on multiple devices and guarantees all specified kernels are dispatched + * on respective streams before enqueuing any other work on the specified streams from any other + * threads + * @ingroup Execution + * + * @param [in] launchParamsList List of launch parameters, one per device. + * @param [in] numDevices Size of the launchParamsList array. + * @param [in] flags Flags to control launch behavior. + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +template +inline hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, + unsigned int numDevices, + unsigned int flags = 0) { + return hipExtLaunchMultiKernelMultiDevice(launchParamsList, numDevices, flags); +} +/** + * @brief Binds a memory area to a texture [Deprecated] + * + * @ingroup TextureD + * + * @param [in] offset Offset in bytes. + * @param [in] tex Texture to bind. + * @param [in] devPtr Pointer of memory on the device. + * @param [in] size Size of memory in bites. + * + * @warning This API is deprecated. + * + */ +template HIP_DEPRECATED(HIP_DEPRECATED_MSG) +static inline hipError_t hipBindTexture(size_t* offset, const struct texture& tex, + const void* devPtr, size_t size = UINT_MAX) { + return hipBindTexture(offset, &tex, devPtr, &tex.channelDesc, size); +} +/** + * @brief Binds a memory area to a texture [Deprecated] + * + * @ingroup TextureD + * + * @param [in] offset Offset in bytes. + * @param [in] tex Texture to bind. + * @param [in] devPtr Pointer of memory on the device. + * @param [in] desc Texture channel format. + * @param [in] size Size of memory in bites. + * + * @warning This API is deprecated. + * + */ +template HIP_DEPRECATED(HIP_DEPRECATED_MSG) +static inline hipError_t + hipBindTexture(size_t* offset, const struct texture& tex, const void* devPtr, + const struct hipChannelFormatDesc& desc, size_t size = UINT_MAX) { + return hipBindTexture(offset, &tex, devPtr, &desc, size); +} +/** + * @brief Binds a 2D memory area to a texture [Deprecated] + * + * @ingroup TextureD + * + * @param [in] offset Offset in bytes. + * @param [in] tex Texture to bind. + * @param [in] devPtr Pointer of 2D memory area on the device. + * @param [in] width Width in texel units. + * @param [in] height Height in texel units. + * @param [in] pitch Pitch in bytes. + * + * @warning This API is deprecated. + * + */ +template HIP_DEPRECATED(HIP_DEPRECATED_MSG) +static inline hipError_t + hipBindTexture2D(size_t* offset, const struct texture& tex, + const void* devPtr, size_t width, size_t height, size_t pitch) { + return hipBindTexture2D(offset, &tex, devPtr, &tex.channelDesc, width, height, pitch); +} +/** + * @brief Binds a 2D memory area to a texture [Deprecated] + * + * @ingroup TextureD + * + * @param [in] offset Offset in bytes. + * @param [in] tex Texture to bind. + * @param [in] devPtr Pointer of 2D memory area on the device. + * @param [in] desc Texture channel format. + * @param [in] width Width in texel units. + * @param [in] height Height in texel units. + * @param [in] pitch Pitch in bytes. + * + * @warning This API is deprecated. + * + */ +template HIP_DEPRECATED(HIP_DEPRECATED_MSG) +static inline hipError_t + hipBindTexture2D(size_t* offset, const struct texture& tex, + const void* devPtr, const struct hipChannelFormatDesc& desc, size_t width, + size_t height, size_t pitch) { + return hipBindTexture2D(offset, &tex, devPtr, &desc, width, height, pitch); +} +/** + * @brief Binds an array to a texture [Deprecated] + * + * @ingroup TextureD + * + * @param [in] tex Texture to bind. + * @param [in] array Array of memory on the device. + * + * @warning This API is deprecated. + * + */ +template HIP_DEPRECATED(HIP_DEPRECATED_MSG) +static inline hipError_t + hipBindTextureToArray(const struct texture& tex, hipArray_const_t array) { + struct hipChannelFormatDesc desc; + hipError_t err = hipGetChannelDesc(&desc, array); + return (err == hipSuccess) ? hipBindTextureToArray(&tex, array, &desc) : err; +} +/** + * @brief Binds an array to a texture [Deprecated] + * + * @ingroup TextureD + * + * @param [in] tex Texture to bind. + * @param [in] array Array of memory on the device. + * @param [in] desc Texture channel format. + * + * @warning This API is deprecated. + * + */ +template HIP_DEPRECATED(HIP_DEPRECATED_MSG) +static inline hipError_t + hipBindTextureToArray(const struct texture& tex, hipArray_const_t array, + const struct hipChannelFormatDesc& desc) { + return hipBindTextureToArray(&tex, array, &desc); +} +/** + * @brief Binds a mipmapped array to a texture [Deprecated] + * + * @ingroup TextureD + * + * @param [in] tex Texture to bind. + * @param [in] mipmappedArray Mipmapped Array of memory on the device. + * + * @warning This API is deprecated. + * + */ +template HIP_DEPRECATED(HIP_DEPRECATED_MSG) +static inline hipError_t hipBindTextureToMipmappedArray(const struct texture& tex, + hipMipmappedArray_const_t mipmappedArray) { + struct hipChannelFormatDesc desc; + hipArray_t levelArray; + hipError_t err = hipGetMipmappedArrayLevel(&levelArray, mipmappedArray, 0); + if (err != hipSuccess) { + return err; + } + err = hipGetChannelDesc(&desc, levelArray); + return (err == hipSuccess) ? hipBindTextureToMipmappedArray(&tex, mipmappedArray, &desc) : err; +} +/** + * @brief Binds a mipmapped array to a texture [Deprecated] + * + * @ingroup TextureD + * + * @param [in] tex Texture to bind. + * @param [in] mipmappedArray Mipmapped Array of memory on the device. + * @param [in] desc Texture channel format. + * + * @warning This API is deprecated. + * + */ +template HIP_DEPRECATED(HIP_DEPRECATED_MSG) +static inline hipError_t hipBindTextureToMipmappedArray(const struct texture& tex, + hipMipmappedArray_const_t mipmappedArray, + const struct hipChannelFormatDesc& desc) { + return hipBindTextureToMipmappedArray(&tex, mipmappedArray, &desc); +} +/** + * @brief Unbinds a texture [Depreacated] + * + * @ingroup TextureD + * + * @param [in] tex Texture to unbind. + * + * @warning This API is deprecated. + * + */ +template HIP_DEPRECATED(HIP_DEPRECATED_MSG) +static inline hipError_t hipUnbindTexture(const struct texture& tex) { + return hipUnbindTexture(&tex); +} +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @ingroup StreamO + * @{ + * + * This section describes wrappers for stream Ordered allocation from memory pool functions of + * HIP runtime API. + * + * @note APIs in this section are implemented on Linux, under development on Windows. + * + */ + +/** + * @brief C++ wrappers for allocations from a memory pool + * + * This is an alternate C++ calls for @p hipMallocFromPoolAsync made available through + * function overloading. + * + * @see hipMallocFromPoolAsync + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +static inline hipError_t hipMallocAsync(void** dev_ptr, size_t size, hipMemPool_t mem_pool, + hipStream_t stream) { + return hipMallocFromPoolAsync(dev_ptr, size, mem_pool, stream); +} +/** + * @brief C++ wrappers for allocations from a memory pool on the stream + * + * This is an alternate C++ calls for @p hipMallocFromPoolAsync made available through + * function overloading. + * + * @see hipMallocFromPoolAsync + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +template static inline hipError_t hipMallocAsync(T** dev_ptr, size_t size, + hipMemPool_t mem_pool, + hipStream_t stream) { + return hipMallocFromPoolAsync(reinterpret_cast(dev_ptr), size, mem_pool, stream); +} +/** + * @brief C++ wrappers for allocations from a memory pool + * + * This is an alternate C++ calls for @p hipMallocFromPoolAsync made available through + * function overloading. + * + * @see hipMallocFromPoolAsync + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +template +static inline hipError_t hipMallocAsync(T** dev_ptr, size_t size, hipStream_t stream) { + return hipMallocAsync(reinterpret_cast(dev_ptr), size, stream); +} +/** + * @brief C++ wrappers for allocations from a memory pool + * + * This is an alternate C++ calls for @p hipMallocFromPoolAsync made available through + * function overloading. + * + * @see hipMallocFromPoolAsync + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +template static inline hipError_t hipMallocFromPoolAsync(T** dev_ptr, size_t size, + hipMemPool_t mem_pool, + hipStream_t stream) { + return hipMallocFromPoolAsync(reinterpret_cast(dev_ptr), size, mem_pool, stream); +} +/** + * @brief Launches a HIP kernel using the specified configuration. + * @ingroup Execution + * + * This function dispatches the provided kernel with the given launch configuration and forwards the + * kernel arguments. + * + * @param [in] config Pointer to the kernel launch configuration structure. + * @param [in] kernel Pointer to the device kernel function to be launched. + * @param [in] args Variadic list of arguments to be passed to the kernel. + * + * @returns #hipSuccess if the kernel is launched successfully, otherwise an appropriate error code. + */ +template +static inline __host__ hipError_t hipLaunchKernelEx(const hipLaunchConfig_t* config, + void (*kernel)(KernelArgs...), + Params&&... args) { + return [&](KernelArgs... convertedArgs) { + void* pArgs[] = {&convertedArgs...}; + return ::hipLaunchKernelExC(config, reinterpret_cast(kernel), pArgs); + }(std::forward(args)...); +} +/** + * @} + */ + + +#endif // __cplusplus + +#ifdef __GNUC__ +#pragma GCC visibility pop +#endif + + +#elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#include "hip/nvidia_detail/nvidia_hip_runtime_api.h" +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif + + +/** + * @brief: C++ wrapper for hipMalloc + * @ingroup Memory + * Perform automatic type conversion to eliminate the need for excessive typecasting (ie void**) + * + * __HIP_DISABLE_CPP_FUNCTIONS__ macro can be defined to suppress these + * wrappers. It is useful for applications which need to obtain decltypes of + * HIP runtime APIs. + * + * @see hipMalloc + */ +#if defined(__cplusplus) && !defined(__HIP_DISABLE_CPP_FUNCTIONS__) +template static inline hipError_t hipMalloc(T** devPtr, size_t size) { + return hipMalloc((void**)devPtr, size); +} +/** + * @brief: C++ wrapper for hipMallocPitch + * @ingroup Memory + * Perform automatic type conversion to eliminate the need for excessive typecasting (ie void**) + * + * __HIP_DISABLE_CPP_FUNCTIONS__ macro can be defined to suppress these + * wrappers. It is useful for applications which need to obtain decltypes of + * HIP runtime APIs. + * + * @see hipMallocPitch + */ +template +static inline hipError_t hipMallocPitch(T** devPtr, size_t* pitch, size_t width, size_t height) { + return hipMallocPitch((void**)devPtr, pitch, width, height); +} +/** + * @brief: C++ wrapper for hipHostMalloc + * @ingroup Memory + * Provide an override to automatically typecast the pointer type from void**, and also provide a + * default for the flags. + * + * __HIP_DISABLE_CPP_FUNCTIONS__ macro can be defined to suppress these + * wrappers. It is useful for applications which need to obtain decltypes of + * HIP runtime APIs. + * + * @see hipHostMalloc + */ +template +static inline hipError_t hipHostMalloc(T** ptr, size_t size, + unsigned int flags = hipHostMallocDefault) { + return hipHostMalloc((void**)ptr, size, flags); +} +/** + * @brief: C++ wrapper for hipHostAlloc + * @ingroup Memory + * Provide an override to automatically typecast the pointer type from void**, and also provide a + * default for the flags. + * + * __HIP_DISABLE_CPP_FUNCTIONS__ macro can be defined to suppress these + * wrappers. It is useful for applications which need to obtain decltypes of + * HIP runtime APIs. + * + * @see hipHostAlloc + */ +template static inline hipError_t hipHostAlloc(T** ptr, size_t size, + unsigned int flags = hipHostAllocDefault) { + return hipHostAlloc((void**)ptr, size, flags); +} +/** + * @brief: C++ wrapper for hipMallocManaged + * + * @ingroup MemoryM + * Provide an override to automatically typecast the pointer type from void**, and also provide a + * default for the flags. + * + * __HIP_DISABLE_CPP_FUNCTIONS__ macro can be defined to suppress these + * wrappers. It is useful for applications which need to obtain decltypes of + * HIP runtime APIs. + * + * @see hipMallocManaged + * + */ +template +static inline hipError_t hipMallocManaged(T** devPtr, size_t size, + unsigned int flags = hipMemAttachGlobal) { + return hipMallocManaged((void**)devPtr, size, flags); +} + + +#endif +#endif +// doxygen end HIP API +/** + * @} + */ +#include + +#if USE_PROF_API +#include +#endif diff --git a/external/hip/hip/hip_texture_types.h b/external/hip/hip/hip_texture_types.h new file mode 100644 index 0000000..9cefbe6 --- /dev/null +++ b/external/hip/hip/hip_texture_types.h @@ -0,0 +1,29 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + + +#ifndef HIP_INCLUDE_HIP_HIP_TEXTURE_TYPES_H +#define HIP_INCLUDE_HIP_HIP_TEXTURE_TYPES_H + +#include + +#endif diff --git a/external/hip/hip/hip_vector_types.h b/external/hip/hip/hip_vector_types.h new file mode 100644 index 0000000..98a0bcd --- /dev/null +++ b/external/hip/hip/hip_vector_types.h @@ -0,0 +1,41 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +//! hip_vector_types.h : Defines the HIP vector types. + +#ifndef HIP_INCLUDE_HIP_HIP_VECTOR_TYPES_H +#define HIP_INCLUDE_HIP_HIP_VECTOR_TYPES_H + +#include + + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) +#if __cplusplus +#include +#endif +#elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#include +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif + +#endif diff --git a/external/hip/hip/hiprtc.h b/external/hip/hip/hiprtc.h new file mode 100644 index 0000000..f4f8b88 --- /dev/null +++ b/external/hip/hip/hiprtc.h @@ -0,0 +1,473 @@ +/* +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#pragma once + +#include +#include + +#if !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#include +#elif defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) + +#ifdef __cplusplus +#include +#else +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +#if !defined(_WIN32) +#pragma GCC visibility push(default) +#endif + +/** + * + * @addtogroup GlobalDefs + * @{ + * + */ +/** + * hiprtc error code + */ +typedef enum hiprtcResult { + HIPRTC_SUCCESS = 0, ///< Success + HIPRTC_ERROR_OUT_OF_MEMORY = 1, ///< Out of memory + HIPRTC_ERROR_PROGRAM_CREATION_FAILURE = 2, ///< Failed to create program + HIPRTC_ERROR_INVALID_INPUT = 3, ///< Invalid input + HIPRTC_ERROR_INVALID_PROGRAM = 4, ///< Invalid program + HIPRTC_ERROR_INVALID_OPTION = 5, ///< Invalid option + HIPRTC_ERROR_COMPILATION = 6, ///< Compilation error + HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7, ///< Failed in builtin operation + HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = 8, ///< No name expression after compilation + HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = 9, ///< No lowered names before compilation + HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID = 10, ///< Invalid name expression + HIPRTC_ERROR_INTERNAL_ERROR = 11, ///< Internal error + HIPRTC_ERROR_LINKING = 100 ///< Error in linking +} hiprtcResult; +/** + * hiprtc JIT option + */ +#define hiprtcJIT_option hipJitOption +#define HIPRTC_JIT_MAX_REGISTERS \ + hipJitOptionMaxRegisters ///< CUDA Only Maximum registers may be used in a + ///< thread, passed to compiler +#define HIPRTC_JIT_THREADS_PER_BLOCK \ + hipJitOptionThreadsPerBlock ///< CUDA Only Number of thread per block +#define HIPRTC_JIT_WALL_TIME hipJitOptionWallTime ///< CUDA Only Value for total wall clock time +#define HIPRTC_JIT_INFO_LOG_BUFFER \ + hipJitOptionInfoLogBuffer ///< CUDA Only Pointer to the buffer with + ///< logged information +#define HIPRTC_JIT_INFO_LOG_BUFFER_SIZE_BYTES \ + hipJitOptionInfoLogBufferSizeBytes ///< CUDA Only Size of the buffer + ///< in bytes for logged info +#define HIPRTC_JIT_ERROR_LOG_BUFFER \ + hipJitOptionErrorLogBuffer ///< CUDA Only Pointer to the buffer + ///< with logged error(s) +#define HIPRTC_JIT_ERROR_LOG_BUFFER_SIZE_BYTES \ + hipJitOptionErrorLogBufferSizeBytes ///< CUDA Only Size of the buffer in + ///< bytes for logged error(s) +#define HIPRTC_JIT_OPTIMIZATION_LEVEL \ + hipJitOptionOptimizationLevel ///< Value of optimization level for + ///< generated codes, acceptable + ///< options -O0, -O1, -O2, -O3 +#define HIPRTC_JIT_TARGET_FROM_HIPCONTEXT \ + hipJitOptionTargetFromContext ///< CUDA Only The target context, + ///< which is the default +#define HIPRTC_JIT_TARGET hipJitOptionTarget ///< CUDA Only JIT target +#define HIPRTC_JIT_FALLBACK_STRATEGY hipJitOptionFallbackStrategy ///< CUDA Only Fallback strategy +#define HIPRTC_JIT_GENERATE_DEBUG_INFO \ + hipJitOptionGenerateDebugInfo ///< CUDA Only Generate debug information +#define HIPRTC_JIT_LOG_VERBOSE hipJitOptionLogVerbose ///< CUDA Only Generate log verbose +#define HIPRTC_JIT_GENERATE_LINE_INFO \ + hipJitOptionGenerateLineInfo ///< CUDA Only Generate line number information +#define HIPRTC_JIT_CACHE_MODE hipJitOptionCacheMode ///< CUDA Only Set cache mode +#define HIPRTC_JIT_NEW_SM3X_OPT hipJitOptionSm3xOpt ///< @deprecated CUDA Only New SM3X option. +#define HIPRTC_JIT_FAST_COMPILE hipJitOptionFastCompile ///< CUDA Only Set fast compile +#define HIPRTC_JIT_GLOBAL_SYMBOL_NAMES \ + hipJitOptionGlobalSymbolNames ///< CUDA Only Array of device symbol names to be + ///< relocated to the host +#define HIPRTC_JIT_GLOBAL_SYMBOL_ADDRESS \ + hipJitOptionGlobalSymbolAddresses ///< CUDA Only Array of host addresses to be + ///< relocated to the device +#define HIPRTC_JIT_GLOBAL_SYMBOL_COUNT \ + hipJitOptionGlobalSymbolCount ///< CUDA Only Number of symbol count. +#define HIPRTC_JIT_LTO \ + hipJitOptionLto ///< @deprecated CUDA Only Enable link-time + ///< optimization for device code +#define HIPRTC_JIT_FTZ \ + hipJitOptionFtz ///< @deprecated CUDA Only Set + ///< single-precision denormals. +#define HIPRTC_JIT_PREC_DIV \ + hipJitOptionPrecDiv ///< @deprecated CUDA Only Set + ///< single-precision floating-point division + ///< and reciprocals +#define HIPRTC_JIT_PREC_SQRT \ + hipJitOptionPrecSqrt ///< @deprecated CUDA Only Set + ///< single-precision floating-point + ///< square root +#define HIPRTC_JIT_FMA \ + hipJitOptionFma ///< @deprecated CUDA Only Enable + ///< floating-point multiplies and + ///< adds/subtracts operations +#define HIPRTC_JIT_POSITION_INDEPENDENT_CODE \ + hipJitOptionPositionIndependentCode ///< CUDA Only Generates + ///< Position Independent code +#define HIPRTC_JIT_MIN_CTA_PER_SM \ + hipJitOptionMinCTAPerSM ///< CUDA Only Hints to JIT compiler + ///< the minimum number of CTAs frin + ///< kernel's grid to be mapped to SM +#define HIPRTC_JIT_MAX_THREADS_PER_BLOCK \ + hipJitOptionMaxThreadsPerBlock ///< CUDA only Maximum number of + ///< threads in a thread block +#define HIPRTC_JIT_OVERRIDE_DIRECT_VALUES \ + hipJitOptionOverrideDirectiveValues ///< CUDA only Override Directive + ///< Values +#define HIPRTC_JIT_NUM_OPTIONS hipJitOptionNumOptions ///< Number of options +#define HIPRTC_JIT_IR_TO_ISA_OPT_EXT \ + hipJitOptionIRtoISAOptExt ///< HIP Only Linker options to be + ///< passed on to compiler +#define HIPRTC_JIT_IR_TO_ISA_OPT_COUNT_EXT \ + hipJitOptionIRtoISAOptCountExt ///< HIP Only Count of linker options + ///< to be passed on to +/** + * hiprtc JIT input type + */ +#define hiprtcJITInputType hipJitInputType +#define HIPRTC_JIT_INPUT_CUBIN hipJitInputCubin ///< Cuda only Input Cubin +#define HIPRTC_JIT_INPUT_PTX hipJitInputPtx ///< Cuda only Input PTX +#define HIPRTC_JIT_INPUT_FATBINARY hipJitInputFatBinary ///< Cuda Only Input FAT Binary +#define HIPRTC_JIT_INPUT_OBJECT \ + hipJitInputObject ///< Cuda Only Host Object with embedded device code +#define HIPRTC_JIT_INPUT_LIBRARY \ + hipJitInputLibrary ///< Cuda Only Archive of Host Objects with embedded device code +#define HIPRTC_JIT_INPUT_NVVM \ + hipJitInputNvvm ///< @deprecated CUDA only High Level intermediate code for LTO +#define HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES \ + hipJitNumLegacyInputTypes ///< Count of Legacy Input Types +#define HIPRTC_JIT_INPUT_LLVM_BITCODE \ + hipJitInputLLVMBitcode ///< HIP Only LLVM Bitcode or IR assembly +#define HIPRTC_JIT_INPUT_LLVM_BUNDLED_BITCODE \ + hipJitInputLLVMBundledBitcode ///< HIP Only LLVM Clang Bundled Code +#define HIPRTC_JIT_INPUT_LLVM_ARCHIVES_OF_BUNDLED_BITCODE \ + hipJitInputLLVMArchivesOfBundledBitcode ///< HIP Only LLVM + ///< Archives of + ///< Bundled Bitcode +#define HIPRTC_JIT_INPUT_SPIRV hipJitInputSpirv ///< HIP Only SPIRV Code Object +#define HIPRTC_JIT_NUM_INPUT_TYPES hipJitNumInputTypes ///< Count of Input Types +/** + * @} + */ + +/** + * hiprtc link state + * + */ +typedef struct ihiprtcLinkState* hiprtcLinkState; +/** + * @ingroup Runtime + * + * @brief Returns text string message to explain the error which occurred + * + * @param [in] result code to convert to string. + * @returns const char pointer to the NULL-terminated error string + * + * @warning In HIP, this function returns the name of the error, + * if the hiprtc result is defined, it will return "Invalid HIPRTC error code" + * + * @see hiprtcResult + */ +const char* hiprtcGetErrorString(hiprtcResult result); + +/** + * @ingroup Runtime + * @brief Sets the parameters as major and minor version. + * + * @param [out] major HIP Runtime Compilation major version. + * @param [out] minor HIP Runtime Compilation minor version. + * + * @returns #HIPRTC_ERROR_INVALID_INPUT, #HIPRTC_SUCCESS + * + */ +hiprtcResult hiprtcVersion(int* major, int* minor); + +/** + * hiprtc program + * + */ +typedef struct _hiprtcProgram* hiprtcProgram; + +/** + * @ingroup Runtime + * @brief Adds the given name exprssion to the runtime compilation program. + * + * @param [in] prog runtime compilation program instance. + * @param [in] name_expression const char pointer to the name expression. + * @returns #HIPRTC_SUCCESS + * + * If const char pointer is NULL, it will return #HIPRTC_ERROR_INVALID_INPUT. + * + * @see hiprtcResult + */ +hiprtcResult hiprtcAddNameExpression(hiprtcProgram prog, const char* name_expression); + +/** + * @ingroup Runtime + * @brief Compiles the given runtime compilation program. + * + * @param [in] prog runtime compilation program instance. + * @param [in] numOptions number of compiler options. + * @param [in] options compiler options as const array of strins. + * @returns #HIPRTC_SUCCESS + * + * If the compiler failed to build the runtime compilation program, + * it will return #HIPRTC_ERROR_COMPILATION. + * + * @see hiprtcResult + */ +hiprtcResult hiprtcCompileProgram(hiprtcProgram prog, int numOptions, const char* const* options); + +/** + * @ingroup Runtime + * @brief Creates an instance of hiprtcProgram with the given input parameters, + * and sets the output hiprtcProgram prog with it. + * + * @param [in, out] prog runtime compilation program instance. + * @param [in] src const char pointer to the program source. + * @param [in] name const char pointer to the program name. + * @param [in] numHeaders number of headers. + * @param [in] headers array of strings pointing to headers. + * @param [in] includeNames array of strings pointing to names included in program source. + * @returns #HIPRTC_SUCCESS + * + * Any invalide input parameter, it will return #HIPRTC_ERROR_INVALID_INPUT + * or #HIPRTC_ERROR_INVALID_PROGRAM. + * + * If failed to create the program, it will return #HIPRTC_ERROR_PROGRAM_CREATION_FAILURE. + * + * @see hiprtcResult + */ +hiprtcResult hiprtcCreateProgram(hiprtcProgram* prog, const char* src, const char* name, + int numHeaders, const char* const* headers, + const char* const* includeNames); + +/** + * @brief Destroys an instance of given hiprtcProgram. + * @ingroup Runtime + * @param [in] prog runtime compilation program instance. + * @returns #HIPRTC_SUCCESS + * + * If prog is NULL, it will return #HIPRTC_ERROR_INVALID_INPUT. + * + * @see hiprtcResult + */ +hiprtcResult hiprtcDestroyProgram(hiprtcProgram* prog); + +/** + * @brief Gets the lowered (mangled) name from an instance of hiprtcProgram with the given input + * parameters, and sets the output lowered_name with it. + * @ingroup Runtime + * @param [in] prog runtime compilation program instance. + * @param [in] name_expression const char pointer to the name expression. + * @param [in, out] lowered_name const char array to the lowered (mangled) name. + * @returns #HIPRTC_SUCCESS + * + * If any invalide nullptr input parameters, it will return #HIPRTC_ERROR_INVALID_INPUT + * + * If name_expression is not found, it will return #HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID + * + * If failed to get lowered_name from the program, it will return #HIPRTC_ERROR_COMPILATION. + * + * @see hiprtcResult + */ +hiprtcResult hiprtcGetLoweredName(hiprtcProgram prog, const char* name_expression, + const char** lowered_name); + +/** + * @brief Gets the log generated by the runtime compilation program instance. + * @ingroup Runtime + * @param [in] prog runtime compilation program instance. + * @param [out] log memory pointer to the generated log. + * @returns #HIPRTC_SUCCESS + * + * @see hiprtcResult + */ +hiprtcResult hiprtcGetProgramLog(hiprtcProgram prog, char* log); + +/** + * @brief Gets the size of log generated by the runtime compilation program instance. + * + * @param [in] prog runtime compilation program instance. + * @param [out] logSizeRet size of generated log. + * @returns #HIPRTC_SUCCESS + * + * @see hiprtcResult + */ +hiprtcResult hiprtcGetProgramLogSize(hiprtcProgram prog, size_t* logSizeRet); + +/** + * @brief Gets the pointer of compilation binary by the runtime compilation program instance. + * @ingroup Runtime + * @param [in] prog runtime compilation program instance. + * @param [out] code char pointer to binary. + * @returns #HIPRTC_SUCCESS + * + * @see hiprtcResult + */ +hiprtcResult hiprtcGetCode(hiprtcProgram prog, char* code); + +/** + * @brief Gets the size of compilation binary by the runtime compilation program instance. + * @ingroup Runtime + * @param [in] prog runtime compilation program instance. + * @param [out] codeSizeRet the size of binary. + * @returns #HIPRTC_SUCCESS + * + * @see hiprtcResult + */ +hiprtcResult hiprtcGetCodeSize(hiprtcProgram prog, size_t* codeSizeRet); + +/** + * @brief Gets the pointer of compiled bitcode by the runtime compilation program instance. + * + * @param [in] prog runtime compilation program instance. + * @param [out] bitcode char pointer to bitcode. + * @return HIPRTC_SUCCESS + * + * @see hiprtcResult + */ +hiprtcResult hiprtcGetBitcode(hiprtcProgram prog, char* bitcode); + +/** + * @brief Gets the size of compiled bitcode by the runtime compilation program instance. + * @ingroup Runtime + * + * @param [in] prog runtime compilation program instance. + * @param [out] bitcode_size the size of bitcode. + * @returns #HIPRTC_SUCCESS + * + * @see hiprtcResult + */ +hiprtcResult hiprtcGetBitcodeSize(hiprtcProgram prog, size_t* bitcode_size); + +/** + * @brief Creates the link instance via hiprtc APIs. + * @ingroup Runtime + * @param [in] num_options Number of options + * @param [in] option_ptr Array of options + * @param [in] option_vals_pptr Array of option values cast to void* + * @param [out] hip_link_state_ptr hiprtc link state created upon success + * + * @returns #HIPRTC_SUCCESS, #HIPRTC_ERROR_INVALID_INPUT, #HIPRTC_ERROR_INVALID_OPTION + * + * @see hiprtcResult + */ +hiprtcResult hiprtcLinkCreate(unsigned int num_options, hiprtcJIT_option* option_ptr, + void** option_vals_pptr, hiprtcLinkState* hip_link_state_ptr); + +/** + * @brief Adds a file with bit code to be linked with options + * @ingroup Runtime + * @param [in] hip_link_state hiprtc link state + * @param [in] input_type Type of the input data or bitcode + * @param [in] file_path Path to the input file where bitcode is present + * @param [in] num_options Size of the options + * @param [in] options_ptr Array of options applied to this input + * @param [in] option_values Array of option values cast to void* + * + * @returns #HIPRTC_SUCCESS + * + * If input values are invalid, it will + * @return #HIPRTC_ERROR_INVALID_INPUT + * + * @see hiprtcResult + */ + +hiprtcResult hiprtcLinkAddFile(hiprtcLinkState hip_link_state, hiprtcJITInputType input_type, + const char* file_path, unsigned int num_options, + hiprtcJIT_option* options_ptr, void** option_values); + +/** + * @brief Completes the linking of the given program. + * @ingroup Runtime + * @param [in] hip_link_state hiprtc link state + * @param [in] input_type Type of the input data or bitcode + * @param [in] image Input data which is null terminated + * @param [in] image_size Size of the input data + * @param [in] name Optional name for this input + * @param [in] num_options Size of the options + * @param [in] options_ptr Array of options applied to this input + * @param [in] option_values Array of option values cast to void* + * + * @returns #HIPRTC_SUCCESS, #HIPRTC_ERROR_INVALID_INPUT + * + * If adding the file fails, it will + * @return #HIPRTC_ERROR_PROGRAM_CREATION_FAILURE + * + * @see hiprtcResult + */ + +hiprtcResult hiprtcLinkAddData(hiprtcLinkState hip_link_state, hiprtcJITInputType input_type, + void* image, size_t image_size, const char* name, + unsigned int num_options, hiprtcJIT_option* options_ptr, + void** option_values); + +/** + * @brief Completes the linking of the given program. + * @ingroup Runtime + * @param [in] hip_link_state hiprtc link state + * @param [out] bin_out Upon success, points to the output binary + * @param [out] size_out Size of the binary is stored (optional) + * + * @returns #HIPRTC_SUCCESS + * + * If adding the data fails, it will + * @return #HIPRTC_ERROR_LINKING + * + * @see hiprtcResult + */ +hiprtcResult hiprtcLinkComplete(hiprtcLinkState hip_link_state, void** bin_out, size_t* size_out); + +/** + * @brief Deletes the link instance via hiprtc APIs. + * @ingroup Runtime + * @param [in] hip_link_state link state instance + * + * @returns #HIPRTC_SUCCESS + * + * @see hiprtcResult + */ +hiprtcResult hiprtcLinkDestroy(hiprtcLinkState hip_link_state); + +#if !defined(_WIN32) +#pragma GCC visibility pop +#endif + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif diff --git a/external/hip/hip/library_types.h b/external/hip/hip/library_types.h new file mode 100644 index 0000000..c3c8d5d --- /dev/null +++ b/external/hip/hip/library_types.h @@ -0,0 +1,84 @@ +/* +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_LIBRARY_TYPES_H +#define HIP_INCLUDE_HIP_LIBRARY_TYPES_H + +#if !defined(__HIPCC_RTC__) +#include +#endif + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) + +typedef enum hipDataType { + HIP_R_32F = 0, + HIP_R_64F = 1, + HIP_R_16F = 2, + HIP_R_8I = 3, + HIP_C_32F = 4, + HIP_C_64F = 5, + HIP_C_16F = 6, + HIP_C_8I = 7, + HIP_R_8U = 8, + HIP_C_8U = 9, + HIP_R_32I = 10, + HIP_C_32I = 11, + HIP_R_32U = 12, + HIP_C_32U = 13, + HIP_R_16BF = 14, + HIP_C_16BF = 15, + HIP_R_4I = 16, + HIP_C_4I = 17, + HIP_R_4U = 18, + HIP_C_4U = 19, + HIP_R_16I = 20, + HIP_C_16I = 21, + HIP_R_16U = 22, + HIP_C_16U = 23, + HIP_R_64I = 24, + HIP_C_64I = 25, + HIP_R_64U = 26, + HIP_C_64U = 27, + HIP_R_8F_E4M3 = 28, + HIP_R_8F_E5M2 = 29, + HIP_R_8F_UE8M0 = 30, + HIP_R_6F_E2M3 = 31, + HIP_R_6F_E3M2 = 32, + HIP_R_4F_E2M1 = 33, + // HIP specific Data Types + HIP_R_8F_E4M3_FNUZ = 1000, + HIP_R_8F_E5M2_FNUZ = 1001, +} hipDataType; + +typedef enum hipLibraryPropertyType { + HIP_LIBRARY_MAJOR_VERSION, + HIP_LIBRARY_MINOR_VERSION, + HIP_LIBRARY_PATCH_LEVEL +} hipLibraryPropertyType; + +#elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#include "library_types.h" +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif + +#endif diff --git a/external/hip/hip/linker_types.h b/external/hip/hip/linker_types.h new file mode 100755 index 0000000..1131910 --- /dev/null +++ b/external/hip/hip/linker_types.h @@ -0,0 +1,138 @@ + +/* +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_LINKER_TYPES_H +#define HIP_INCLUDE_HIP_LINKER_TYPES_H + +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wreserved-identifier" +#pragma clang diagnostic ignored "-Wreserved-macro-identifier" +#endif + + +#if !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#elif defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) + +/** + * @defgroup LinkerTypes Jit Linker Data Types + * @{ + * This section describes the Jit Linker data types. + * + */ + +/** + * hipJitOption + */ +typedef enum hipJitOption { + hipJitOptionMaxRegisters = 0, ///< CUDA Only Maximum registers may be used in a thread, + ///< passed to compiler + hipJitOptionThreadsPerBlock, ///< CUDA Only Number of thread per block + hipJitOptionWallTime, ///< CUDA Only Value for total wall clock time + hipJitOptionInfoLogBuffer, ///< CUDA Only Pointer to the buffer with logged information + hipJitOptionInfoLogBufferSizeBytes, ///< CUDA Only Size of the buffer in bytes for logged info + hipJitOptionErrorLogBuffer, ///< CUDA Only Pointer to the buffer with logged error(s) + hipJitOptionErrorLogBufferSizeBytes, ///< CUDA Only Size of the buffer in bytes for logged + ///< error(s) + hipJitOptionOptimizationLevel, ///< Value of optimization level for generated codes, acceptable + ///< options -O0, -O1, -O2, -O3 + hipJitOptionTargetFromContext, ///< CUDA Only The target context, which is the default + hipJitOptionTarget, ///< CUDA Only JIT target + hipJitOptionFallbackStrategy, ///< CUDA Only Fallback strategy + hipJitOptionGenerateDebugInfo, ///< CUDA Only Generate debug information + hipJitOptionLogVerbose, ///< CUDA Only Generate log verbose + hipJitOptionGenerateLineInfo, ///< CUDA Only Generate line number information + hipJitOptionCacheMode, ///< CUDA Only Set cache mode + hipJitOptionSm3xOpt, ///< @deprecated CUDA Only New SM3X option. + hipJitOptionFastCompile, ///< CUDA Only Set fast compile + hipJitOptionGlobalSymbolNames, ///< CUDA Only Array of device symbol names to be relocated to the + ///< host + hipJitOptionGlobalSymbolAddresses, ///< CUDA Only Array of host addresses to be relocated to the + ///< device + hipJitOptionGlobalSymbolCount, ///< CUDA Only Number of symbol count. + hipJitOptionLto, ///< @deprecated CUDA Only Enable link-time optimization for device code + hipJitOptionFtz, ///< @deprecated CUDA Only Set single-precision denormals. + hipJitOptionPrecDiv, ///< @deprecated CUDA Only Set single-precision floating-point division + ///< and reciprocals + hipJitOptionPrecSqrt, ///< @deprecated CUDA Only Set single-precision floating-point square root + hipJitOptionFma, ///< @deprecated CUDA Only Enable floating-point multiplies and + ///< adds/subtracts operations + hipJitOptionPositionIndependentCode, ///< CUDA Only Generates Position Independent code + hipJitOptionMinCTAPerSM, ///< CUDA Only Hints to JIT compiler the minimum number of CTAs frin + ///< kernel's grid to be mapped to SM + hipJitOptionMaxThreadsPerBlock, ///< CUDA only Maximum number of threads in a thread block + hipJitOptionOverrideDirectiveValues, ///< Cuda only Override Directive values + hipJitOptionNumOptions, ///< Number of options + hipJitOptionIRtoISAOptExt = 10000, ///< Hip Only Linker options to be passed on to compiler + hipJitOptionIRtoISAOptCountExt, ///< Hip Only Count of linker options to be passed on to compiler +} hipJitOption; +/** + * hipJitInputType + */ +typedef enum hipJitInputType { + hipJitInputCubin = 0, ///< Cuda only Input cubin + hipJitInputPtx, ///< Cuda only Input PTX + hipJitInputFatBinary, ///< Cuda Only Input FAT Binary + hipJitInputObject, ///< Cuda Only Host Object with embedded device code + hipJitInputLibrary, ///< Cuda Only Archive of Host Objects with embedded + ///< device code + hipJitInputNvvm, ///< @deprecated Cuda only High Level intermediate + ///< code for LTO + hipJitNumLegacyInputTypes, ///< Count of Legacy Input Types + hipJitInputLLVMBitcode = 100, ///< HIP Only LLVM Bitcode or IR assembly + hipJitInputLLVMBundledBitcode = 101, ///< HIP Only LLVM Clang Bundled Code + hipJitInputLLVMArchivesOfBundledBitcode = 102, ///< HIP Only LLVM Archive of Bundled Bitcode + hipJitInputSpirv = 103, ///< HIP Only SPIRV Code Object + hipJitNumInputTypes = 10 ///< Count of Input Types +} hipJitInputType; +/** + * hipJitCacheMode + */ +typedef enum hipJitCacheMode { + hipJitCacheOptionNone = 0, + hipJitCacheOptionCG, + hipJitCacheOptionCA +} hipJitCacheMode; +/** + * hipJitFallback + */ +typedef enum hipJitFallback { + hipJitPreferPTX = 0, + hipJitPreferBinary, +} hipJitFallback; + +typedef enum hipLibraryOption_e { + hipLibraryHostUniversalFunctionAndDataTable = 0, + hipLibraryBinaryIsPreserved = 1 +} hipLibraryOption; + +// doxygen end LinkerTypes +/** + * @} + */ + +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif + +#endif // HIP_INCLUDE_HIP_LINKER_TYPES_H \ No newline at end of file diff --git a/external/hip/hip/math_functions.h b/external/hip/hip/math_functions.h new file mode 100644 index 0000000..896c861 --- /dev/null +++ b/external/hip/hip/math_functions.h @@ -0,0 +1,42 @@ +/* +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_MATH_FUNCTIONS_H +#define HIP_INCLUDE_HIP_MATH_FUNCTIONS_H + +// Some standard header files, these are included by hc.hpp and so want to make them avail on both +// paths to provide a consistent include env and avoid "missing symbol" errors that only appears +// on NVCC path: + +#if !defined(__HIPCC_RTC__) +#include +#endif + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) +#include +#elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +// #include +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif + +#endif diff --git a/external/hip/hip/surface_types.h b/external/hip/hip/surface_types.h new file mode 100644 index 0000000..d5cc457 --- /dev/null +++ b/external/hip/hip/surface_types.h @@ -0,0 +1,65 @@ +/* +Copyright (c) 2022 - 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @file surface_types.h + * @brief Defines surface types for HIP runtime. + */ + +#ifndef HIP_INCLUDE_HIP_SURFACE_TYPES_H +#define HIP_INCLUDE_HIP_SURFACE_TYPES_H + +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wreserved-identifier" +#endif + +#if !defined(__HIPCC_RTC__) +#include +#endif + +/** + * An opaque value that represents a hip surface object + */ +struct __hip_surface; +typedef struct __hip_surface* hipSurfaceObject_t; + +/** + * hip surface reference + */ +struct surfaceReference { + hipSurfaceObject_t surfaceObject; +}; + +/** + * hip surface boundary modes + */ +enum hipSurfaceBoundaryMode { + hipBoundaryModeZero = 0, + hipBoundaryModeTrap = 1, + hipBoundaryModeClamp = 2 +}; + +#if defined(__clang__) +#pragma clang diagnostic pop +#endif + +#endif /* !HIP_INCLUDE_HIP_SURFACE_TYPES_H */ diff --git a/external/hip/hip/texture_types.h b/external/hip/hip/texture_types.h new file mode 100644 index 0000000..65290cd --- /dev/null +++ b/external/hip/hip/texture_types.h @@ -0,0 +1,193 @@ +/* +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_TEXTURE_TYPES_H +#define HIP_INCLUDE_HIP_TEXTURE_TYPES_H + +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wreserved-identifier" +#pragma clang diagnostic ignored "-Wreserved-macro-identifier" +#pragma clang diagnostic ignored "-Wc++98-compat" +#endif + +#if !defined(__HIPCC_RTC__) +#include +#endif + +#if !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#include "texture_types.h" +#elif defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) +/******************************************************************************* + * * + * * + * * + *******************************************************************************/ +#if !defined(__HIPCC_RTC__) +#include +#include +#endif // !defined(__HIPCC_RTC__) + +#define hipTextureType1D 0x01 +#define hipTextureType2D 0x02 +#define hipTextureType3D 0x03 +#define hipTextureTypeCubemap 0x0C +#define hipTextureType1DLayered 0xF1 +#define hipTextureType2DLayered 0xF2 +#define hipTextureTypeCubemapLayered 0xFC + +/** + * Should be same as HSA_IMAGE_OBJECT_SIZE_DWORD/HSA_SAMPLER_OBJECT_SIZE_DWORD + */ +#define HIP_IMAGE_OBJECT_SIZE_DWORD 12 +#define HIP_SAMPLER_OBJECT_SIZE_DWORD 8 +#define HIP_SAMPLER_OBJECT_OFFSET_DWORD HIP_IMAGE_OBJECT_SIZE_DWORD +#define HIP_TEXTURE_OBJECT_SIZE_DWORD (HIP_IMAGE_OBJECT_SIZE_DWORD + HIP_SAMPLER_OBJECT_SIZE_DWORD) + +/** + * An opaque value that represents a hip texture object + */ +struct __hip_texture; +typedef struct __hip_texture* hipTextureObject_t; + +/** + * hip texture address modes + */ +enum hipTextureAddressMode { + hipAddressModeWrap = 0, + hipAddressModeClamp = 1, + hipAddressModeMirror = 2, + hipAddressModeBorder = 3 +}; + +/** + * hip texture filter modes + */ +enum hipTextureFilterMode { hipFilterModePoint = 0, hipFilterModeLinear = 1 }; + +/** + * hip texture read modes + */ +enum hipTextureReadMode { hipReadModeElementType = 0, hipReadModeNormalizedFloat = 1 }; + +/** + * hip texture reference + */ +typedef struct textureReference { + int normalized; + enum hipTextureReadMode readMode; // used only for driver API's + enum hipTextureFilterMode filterMode; + enum hipTextureAddressMode addressMode[3]; // Texture address mode for up to 3 dimensions + struct hipChannelFormatDesc channelDesc; + int sRGB; // Perform sRGB->linear conversion during texture read + unsigned int maxAnisotropy; // Limit to the anisotropy ratio + enum hipTextureFilterMode mipmapFilterMode; + float mipmapLevelBias; + float minMipmapLevelClamp; + float maxMipmapLevelClamp; + + hipTextureObject_t textureObject; + int numChannels; + enum hipArray_Format format; +} textureReference; + +/** + * hip texture descriptor + */ +typedef struct hipTextureDesc { + enum hipTextureAddressMode addressMode[3]; // Texture address mode for up to 3 dimensions + enum hipTextureFilterMode filterMode; + enum hipTextureReadMode readMode; + int sRGB; // Perform sRGB->linear conversion during texture read + float borderColor[4]; + int normalizedCoords; + unsigned int maxAnisotropy; + enum hipTextureFilterMode mipmapFilterMode; + float mipmapLevelBias; + float minMipmapLevelClamp; + float maxMipmapLevelClamp; +} hipTextureDesc; + +#if __cplusplus + +/******************************************************************************* + * * + * * + * * + *******************************************************************************/ +#if __HIP__ +#define __HIP_TEXTURE_ATTRIB __attribute__((device_builtin_texture_type)) +#else +#define __HIP_TEXTURE_ATTRIB +#endif + +typedef textureReference* hipTexRef; + +template +struct __HIP_TEXTURE_ATTRIB texture : public textureReference { + texture(int norm = 0, enum hipTextureFilterMode fMode = hipFilterModePoint, + enum hipTextureAddressMode aMode = hipAddressModeClamp) { + normalized = norm; + readMode = mode; + filterMode = fMode; + addressMode[0] = aMode; + addressMode[1] = aMode; + addressMode[2] = aMode; + channelDesc = hipCreateChannelDesc(); + sRGB = 0; + textureObject = nullptr; + maxAnisotropy = 0; + mipmapLevelBias = 0; + minMipmapLevelClamp = 0; + maxMipmapLevelClamp = 0; + } + + texture(int norm, enum hipTextureFilterMode fMode, enum hipTextureAddressMode aMode, + struct hipChannelFormatDesc desc) { + normalized = norm; + readMode = mode; + filterMode = fMode; + addressMode[0] = aMode; + addressMode[1] = aMode; + addressMode[2] = aMode; + channelDesc = desc; + sRGB = 0; + textureObject = nullptr; + maxAnisotropy = 0; + mipmapLevelBias = 0; + minMipmapLevelClamp = 0; + maxMipmapLevelClamp = 0; + } +}; + +#endif /* __cplusplus */ + +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif + +#if defined(__clang__) +#pragma clang diagnostic pop +#endif + +#endif diff --git a/external/hip/hip_bf16.h b/external/hip/hip_bf16.h new file mode 100644 index 0000000..1783946 --- /dev/null +++ b/external/hip/hip_bf16.h @@ -0,0 +1,36 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HIP_BF16_H +#define HIP_INCLUDE_HIP_HIP_BF16_H + +#include + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) +#include +#elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#include +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif + +#endif // HIP_INCLUDE_HIP_HIP_BF16_H diff --git a/external/hip/hip_bfloat16.h b/external/hip/hip_bfloat16.h new file mode 100644 index 0000000..fbbfd10 --- /dev/null +++ b/external/hip/hip_bfloat16.h @@ -0,0 +1,44 @@ +/** + * MIT License + * + * Copyright (c) 2019 - 2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/*!\file + * \brief hip_bfloat16.h provides struct for hip_bfloat16 typedef + */ + +#ifndef _HIP_BFLOAT16_H_ +#define _HIP_BFLOAT16_H_ + +#if !defined(__HIPCC_RTC__) +#include +#endif + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) +#include +#elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#warning "hip_bfloat16.h is not supported on nvidia platform" +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif + +#endif // _HIP_BFLOAT16_H_ diff --git a/external/hip/hip_common.h b/external/hip/hip_common.h new file mode 100644 index 0000000..4a7dcff --- /dev/null +++ b/external/hip/hip_common.h @@ -0,0 +1,100 @@ +/* +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HIP_COMMON_H +#define HIP_INCLUDE_HIP_HIP_COMMON_H + +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wreserved-macro-identifier" +#endif +// Common code included at start of every hip file. +// Auto enable __HIP_PLATFORM_AMD__ if compiling on AMD platform +// Other compiler (GCC,ICC,etc) need to set one of these macros explicitly +#if defined(__clang__) && defined(__HIP__) +#ifndef __HIP_PLATFORM_AMD__ +#define __HIP_PLATFORM_AMD__ +#endif +#endif // defined(__clang__) && defined(__HIP__) + +// Auto enable __HIP_PLATFORM_NVIDIA__ if compiling with NVIDIA platform +#if defined(__NVCC__) || (defined(__clang__) && defined(__CUDA__) && !defined(__HIP__)) +#ifndef __HIP_PLATFORM_NVIDIA__ +#define __HIP_PLATFORM_NVIDIA__ +#endif + +#ifdef __CUDACC__ +#define __HIPCC__ +#endif + +#endif //__NVCC__ + +// Auto enable __HIP_DEVICE_COMPILE__ if compiled in HCC or NVCC device path +#if (defined(__HCC_ACCELERATOR__) && __HCC_ACCELERATOR__ != 0) || \ + (defined(__CUDA_ARCH__) && __CUDA_ARCH__ != 0) +#define __HIP_DEVICE_COMPILE__ 1 +#endif + +#ifdef __GNUC__ +#define HIP_PUBLIC_API __attribute__((visibility("default"))) +#define HIP_INTERNAL_EXPORTED_API __attribute__((visibility("default"))) +#else +#define HIP_PUBLIC_API +#define HIP_INTERNAL_EXPORTED_API +#endif + +#if __HIP_DEVICE_COMPILE__ == 0 +// 32-bit Atomics +#define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (0) +#define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (0) +#define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (0) +#define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (0) +#define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (0) + +// 64-bit Atomics +#define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (0) +#define __HIP_ARCH_HAS_SHARED_INT64_ATOMICS__ (0) + +// Doubles +#define __HIP_ARCH_HAS_DOUBLES__ (0) + +// Warp cross-lane operations +#define __HIP_ARCH_HAS_WARP_VOTE__ (0) +#define __HIP_ARCH_HAS_WARP_BALLOT__ (0) +#define __HIP_ARCH_HAS_WARP_SHUFFLE__ (0) +#define __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ (0) + +// Sync +#define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (0) +#define __HIP_ARCH_HAS_SYNC_THREAD_EXT__ (0) + +// Misc +#define __HIP_ARCH_HAS_SURFACE_FUNCS__ (0) +#define __HIP_ARCH_HAS_3DGRID__ (0) +#define __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ (0) +#endif + +#if defined(__clang__) +#pragma clang diagnostic pop +#endif + +#endif diff --git a/external/hip/hip_complex.h b/external/hip/hip_complex.h new file mode 100644 index 0000000..66ff0b3 --- /dev/null +++ b/external/hip/hip_complex.h @@ -0,0 +1,38 @@ +/* +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HIP_COMPLEX_H +#define HIP_INCLUDE_HIP_HIP_COMPLEX_H + +#if !defined(__HIPCC_RTC__) +#include +#endif + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) +#include +#elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#include +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif + +#endif diff --git a/external/hip/hip_cooperative_groups.h b/external/hip/hip_cooperative_groups.h new file mode 100644 index 0000000..6734819 --- /dev/null +++ b/external/hip/hip_cooperative_groups.h @@ -0,0 +1,46 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @file hip_cooperative_groups.h + * + * @brief Defines new types and device API wrappers for `Cooperative Group` + * feature. + */ + +#ifndef HIP_INCLUDE_HIP_HIP_COOPERATIVE_GROUP_H +#define HIP_INCLUDE_HIP_HIP_COOPERATIVE_GROUP_H + +#include +#include + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) +#if __cplusplus && defined(__clang__) && defined(__HIP__) +#include +#endif +#elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#include +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif + +#endif // HIP_INCLUDE_HIP_HIP_COOPERATIVE_GROUP_H diff --git a/external/hip/hip_deprecated.h b/external/hip/hip_deprecated.h new file mode 100644 index 0000000..91c58e2 --- /dev/null +++ b/external/hip/hip_deprecated.h @@ -0,0 +1,119 @@ +/* + * Copyright (C) Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#pragma once + +// This file will add older hip functions used in the versioning system +// Find the deprecated functions and structs in hip_device.cpp + +// This struct is also kept in hip_device.cpp +typedef struct hipDeviceProp_tR0000 { + char name[256]; ///< Device name. + size_t totalGlobalMem; ///< Size of global memory region (in bytes). + size_t sharedMemPerBlock; ///< Size of shared memory region (in bytes). + int regsPerBlock; ///< Registers per block. + int warpSize; ///< Warp size. + int maxThreadsPerBlock; ///< Max work items per work group or workgroup max size. + int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block. + int maxGridSize[3]; ///< Max grid dimensions (XYZ). + int clockRate; ///< Max clock frequency of the multiProcessors in khz. + int memoryClockRate; ///< Max global memory clock frequency in khz. + int memoryBusWidth; ///< Global memory bus width in bits. + size_t totalConstMem; ///< Size of shared memory region (in bytes). + int major; ///< Major compute capability. On HCC, this is an approximation and features may + ///< differ from CUDA CC. See the arch feature flags for portable ways to query + ///< feature caps. + int minor; ///< Minor compute capability. On HCC, this is an approximation and features may + ///< differ from CUDA CC. See the arch feature flags for portable ways to query + ///< feature caps. + int multiProcessorCount; ///< Number of multi-processors. When the GPU works in Compute + ///< Unit (CU) mode, this value equals the number of CUs; + ///< when in Workgroup Processor (WGP) mode, this value equels + ///< half of CUs, because a single WGP contains two CUs. + int l2CacheSize; ///< L2 cache size. + int maxThreadsPerMultiProcessor; ///< Maximum resident threads per multi-processor. + int computeMode; ///< Compute mode. + int clockInstructionRate; ///< Frequency in khz of the timer used by the device-side "clock*" + ///< instructions. New for HIP. + hipDeviceArch_t arch; ///< Architectural feature flags. New for HIP. + int concurrentKernels; ///< Device can possibly execute multiple kernels concurrently. + int pciDomainID; ///< PCI Domain ID + int pciBusID; ///< PCI Bus ID. + int pciDeviceID; ///< PCI Device ID. + size_t maxSharedMemoryPerMultiProcessor; ///< Maximum Shared Memory Per Multiprocessor. + int isMultiGpuBoard; ///< 1 if device is on a multi-GPU board, 0 if not. + int canMapHostMemory; ///< Check whether HIP can map host memory + int gcnArch; ///< DEPRECATED: use gcnArchName instead + char gcnArchName[256]; ///< AMD GCN Arch Name. + int integrated; ///< APU vs dGPU + int cooperativeLaunch; ///< HIP device supports cooperative launch + int cooperativeMultiDeviceLaunch; ///< HIP device supports cooperative launch on multiple + ///< devices + int maxTexture1DLinear; ///< Maximum size for 1D textures bound to linear memory + int maxTexture1D; ///< Maximum number of elements in 1D images + int maxTexture2D[2]; ///< Maximum dimensions (width, height) of 2D images, in image elements + int maxTexture3D[3]; ///< Maximum dimensions (width, height, depth) of 3D images, in image + ///< elements + unsigned int* hdpMemFlushCntl; ///< Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register + unsigned int* hdpRegFlushCntl; ///< Addres of HDP_REG_COHERENCY_FLUSH_CNTL register + size_t memPitch; ///< Maximum pitch in bytes allowed by memory copies + size_t textureAlignment; ///< Alignment requirement for textures + size_t texturePitchAlignment; ///< Pitch alignment requirement for texture references bound to + ///< pitched memory + int kernelExecTimeoutEnabled; ///< Run time limit for kernels executed on the device + int ECCEnabled; ///< Device has ECC support enabled + int tccDriver; ///< 1:If device is Tesla device using TCC driver, else 0 + int cooperativeMultiDeviceUnmatchedFunc; ///< HIP device supports cooperative launch on + ///< multiple + /// devices with unmatched functions + int cooperativeMultiDeviceUnmatchedGridDim; ///< HIP device supports cooperative launch on + ///< multiple + /// devices with unmatched grid dimensions + int cooperativeMultiDeviceUnmatchedBlockDim; ///< HIP device supports cooperative launch on + ///< multiple + /// devices with unmatched block dimensions + int cooperativeMultiDeviceUnmatchedSharedMem; ///< HIP device supports cooperative launch on + ///< multiple + /// devices with unmatched shared memories + int isLargeBar; ///< 1: if it is a large PCI bar device, else 0 + int asicRevision; ///< Revision of the GPU in this device + int managedMemory; ///< Device supports allocating managed memory on this system + int directManagedMemAccessFromHost; ///< Host can directly access managed memory on the device + ///< without migration + int concurrentManagedAccess; ///< Device can coherently access managed memory concurrently with + ///< the CPU + int pageableMemoryAccess; ///< Device supports coherently accessing pageable memory + ///< without calling hipHostRegister on it + int pageableMemoryAccessUsesHostPageTables; ///< Device accesses pageable memory via the host's + ///< page tables +} hipDeviceProp_tR0000; + + +#ifdef __cplusplus +extern "C" { +#endif + +hipError_t hipGetDevicePropertiesR0000(hipDeviceProp_tR0000* prop, int device); +hipError_t hipChooseDeviceR0000(int* device, const hipDeviceProp_tR0000* prop); + +#ifdef __cplusplus +} +#endif diff --git a/external/hip/hip_ext.h b/external/hip/hip_ext.h new file mode 100644 index 0000000..7d475be --- /dev/null +++ b/external/hip/hip_ext.h @@ -0,0 +1,162 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HIP_EXT_H +#define HIP_INCLUDE_HIP_HIP_EXT_H +#include "hip/hip_runtime.h" +#if defined(__cplusplus) +#include +#include +#endif +/** @addtogroup Execution Execution Control + * @{ + */ + +/** + * @brief Launches kernel with parameters and shared memory on stream with arguments passed + * to kernel params or extra arguments. + * + * @param [in] f Kernel to launch. + * @param [in] globalWorkSizeX X grid dimension specified in work-items. + * @param [in] globalWorkSizeY Y grid dimension specified in work-items. + * @param [in] globalWorkSizeZ Z grid dimension specified in work-items. + * @param [in] localWorkSizeX X block dimension specified in work-items. + * @param [in] localWorkSizeY Y block dimension specified in work-items. + * @param [in] localWorkSizeZ Z block dimension specified in work-items. + * @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. + * HIP-Clang compiler provides support for extern shared declarations. + * @param [in] hStream Stream where the kernel should be dispatched. + * May be 0, in which case the default stream is used with associated synchronization rules. + * @param [in] kernelParams pointer to kernel parameters. + * @param [in] extra Pointer to kernel arguments. These are passed directly to the kernel and + * must be in the memory layout and alignment expected by the kernel. + * All passed arguments must be naturally aligned according to their type. The memory address of + * each argument should be a multiple of its size in bytes. Please refer to + * hip_porting_driver_api.md for sample usage. + * @param [in] startEvent If non-null, specified event will be updated to track the start time of + * the kernel launch. The event must be created before calling this API. + * @param [in] stopEvent If non-null, specified event will be updated to track the stop time of + * the kernel launch. The event must be created before calling this API. + * @param [in] flags The value of hipExtAnyOrderLaunch, signifies if kernel can be + * launched in any order. + * @returns #hipSuccess, #hipInvalidDeviceId, #hipErrorNotInitialized, #hipErrorInvalidValue. + * + * HIP/ROCm actually updates the start event when the associated kernel completes. + * Currently, timing between startEvent and stopEvent does not include the time it takes to perform + * a system scope release/cache flush - only the time it takes to issues writes to cache. + * + * @note For this HIP API, the flag 'hipExtAnyOrderLaunch' is not supported on AMD GFX9xx boards. + * + */ +HIP_PUBLIC_API +extern "C" hipError_t hipExtModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, + uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ, + uint32_t localWorkSizeX, uint32_t localWorkSizeY, + uint32_t localWorkSizeZ, size_t sharedMemBytes, + hipStream_t hStream, void** kernelParams, + void** extra, hipEvent_t startEvent __dparm(NULL), + hipEvent_t stopEvent __dparm(NULL), + uint32_t flags __dparm(0)); +/** + * @brief This HIP API is deprecated, please use hipExtModuleLaunchKernel() instead. + * + */ +HIP_DEPRECATED("use hipExtModuleLaunchKernel instead") +HIP_PUBLIC_API +extern "C" hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, + uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ, + uint32_t localWorkSizeX, uint32_t localWorkSizeY, + uint32_t localWorkSizeZ, size_t sharedMemBytes, + hipStream_t hStream, void** kernelParams, + void** extra, hipEvent_t startEvent __dparm(NULL), + hipEvent_t stopEvent __dparm(NULL)); + +#if defined(__cplusplus) + +/** + * @brief Launches kernel from the pointer address, with arguments and shared memory on stream. + * + * @param [in] function_address pointer to the Kernel to launch. + * @param [in] numBlocks number of blocks. + * @param [in] dimBlocks dimension of a block. + * @param [in] args pointer to kernel arguments. + * @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. + * HIP-Clang compiler provides support for extern shared declarations. + * @param [in] stream Stream where the kernel should be dispatched. + * May be 0, in which case the default stream is used with associated synchronization rules. + * @param [in] startEvent If non-null, specified event will be updated to track the start time of + * the kernel launch. The event must be created before calling this API. + * @param [in] stopEvent If non-null, specified event will be updated to track the stop time of + * the kernel launch. The event must be created before calling this API. + * @param [in] flags The value of hipExtAnyOrderLaunch, signifies if kernel can be + * launched in any order. + * @returns #hipSuccess, #hipInvalidDeviceId, #hipErrorNotInitialized, #hipErrorInvalidValue. + * + */ +extern "C" hipError_t hipExtLaunchKernel(const void* function_address, dim3 numBlocks, + dim3 dimBlocks, void** args, size_t sharedMemBytes, + hipStream_t stream, hipEvent_t startEvent, + hipEvent_t stopEvent, int flags); + +/** + * @brief Launches kernel with dimention parameters and shared memory on stream with templated + * kernel and arguments. + * + * @param [in] kernel Kernel to launch. + * @param [in] numBlocks const number of blocks. + * @param [in] dimBlocks const dimension of a block. + * @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. + * HIP-Clang compiler provides support for extern shared declarations. + * @param [in] stream Stream where the kernel should be dispatched. + * May be 0, in which case the default stream is used with associated synchronization rules. + * @param [in] startEvent If non-null, specified event will be updated to track the start time of + * the kernel launch. The event must be created before calling this API. + * @param [in] stopEvent If non-null, specified event will be updated to track the stop time of + * the kernel launch. The event must be created before calling this API. + * @param [in] flags The value of hipExtAnyOrderLaunch, signifies if kernel can be + * launched in any order. + * @param [in] args templated kernel arguments. + * + */ +template +inline void hipExtLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks, + std::uint32_t sharedMemBytes, hipStream_t stream, + hipEvent_t startEvent, hipEvent_t stopEvent, std::uint32_t flags, + Args... args) { + constexpr size_t count = sizeof...(Args); + auto tup_ = std::tuple{args...}; + auto tup = validateArgsCountType(kernel, tup_); + void* _Args[count]; + pArgs<0>(tup, _Args); + + auto k = reinterpret_cast(kernel); + hipExtLaunchKernel(k, numBlocks, dimBlocks, _Args, sharedMemBytes, stream, startEvent, stopEvent, + (int)flags); +} + +#endif // defined(__cplusplus) + +// doxygen end AMD-specific features +/** + * @} + */ +#endif // #iidef HIP_INCLUDE_HIP_HIP_EXT_H diff --git a/external/hip/hip_ext_ocp.h b/external/hip/hip_ext_ocp.h new file mode 100644 index 0000000..1037525 --- /dev/null +++ b/external/hip/hip_ext_ocp.h @@ -0,0 +1,31 @@ +/* +Copyright (c) 2024 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HIP_EXT_OCP_H +#define HIP_INCLUDE_HIP_HIP_EXT_OCP_H + +#include + +#include +#include + +#endif // HIP_INCLUDE_HIP_HIP_EXT_OCP_H diff --git a/external/hip/hip_fp16.h b/external/hip/hip_fp16.h new file mode 100644 index 0000000..bf60a3b --- /dev/null +++ b/external/hip/hip_fp16.h @@ -0,0 +1,44 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HIP_FP16_H +#define HIP_INCLUDE_HIP_HIP_FP16_H + +#include + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) +#include +#elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#define HIPRT_INF_FP16 CUDART_INF_FP16 +#define HIPRT_MAX_NORMAL_FP16 CUDART_MAX_NORMAL_FP16 +#define HIPRT_MIN_DENORM_FP16 CUDART_MIN_DENORM_FP16 +#define HIPRT_NAN_FP16 CUDART_NAN_FP16 +#define HIPRT_NEG_ZERO_FP16 CUDART_NEG_ZERO_FP16 +#define HIPRT_ONE_FP16 CUDART_ONE_FP16 +#define HIPRT_ZERO_FP16 CUDART_ZERO_FP16 + +#include "cuda_fp16.h" +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif + +#endif diff --git a/external/hip/hip_fp4.h b/external/hip/hip_fp4.h new file mode 100644 index 0000000..59fb5da --- /dev/null +++ b/external/hip/hip_fp4.h @@ -0,0 +1,32 @@ +/* +Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HIP_FP4_H +#define HIP_INCLUDE_HIP_HIP_FP4_H + +#include + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) +#include +#endif + +#endif // HIP_INCLUDE_HIP_HIP_FP4_H diff --git a/external/hip/hip_fp6.h b/external/hip/hip_fp6.h new file mode 100644 index 0000000..72d642a --- /dev/null +++ b/external/hip/hip_fp6.h @@ -0,0 +1,32 @@ +/* +Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HIP_FP6_H +#define HIP_INCLUDE_HIP_HIP_FP6_H + +#include + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) +#include +#endif + +#endif // HIP_INCLUDE_HIP_HIP_FP6_H diff --git a/external/hip/hip_fp8.h b/external/hip/hip_fp8.h new file mode 100644 index 0000000..82f47af --- /dev/null +++ b/external/hip/hip_fp8.h @@ -0,0 +1,33 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HIP_FP8_H +#define HIP_INCLUDE_HIP_HIP_FP8_H + +#include + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) +// We only have fnuz defs for now, which are not supported by other platforms +#include +#endif + +#endif // HIP_INCLUDE_HIP_HIP_FP8_H diff --git a/external/hip/hip_gl_interop.h b/external/hip/hip_gl_interop.h new file mode 100644 index 0000000..8af6ec3 --- /dev/null +++ b/external/hip/hip_gl_interop.h @@ -0,0 +1,32 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#ifndef HIP_GL_INTEROP_H +#define HIP_GL_INTEROP_H + +#include + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) +#include "hip/amd_detail/amd_hip_gl_interop.h" +#elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#include "hip/nvidia_detail/nvidia_hip_gl_interop.h" +#endif +#endif diff --git a/external/hip/hip_hcc.h b/external/hip/hip_hcc.h new file mode 100644 index 0000000..9e0cfad --- /dev/null +++ b/external/hip/hip_hcc.h @@ -0,0 +1,24 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HIP_HCC_H +#define HIP_INCLUDE_HIP_HIP_HCC_H +#warning "hip/hip_hcc.h is deprecated, please use hip/hip_ext.h" +#include "hip/hip_ext.h" +#endif // #ifdef HIP_INCLUDE_HIP_HIP_HCC_H diff --git a/external/hip/hip_math_constants.h b/external/hip/hip_math_constants.h new file mode 100644 index 0000000..269767e --- /dev/null +++ b/external/hip/hip_math_constants.h @@ -0,0 +1,36 @@ +/* +Copyright (c) 2015 - 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#ifndef HIP_MATH_CONSTANTS_H +#define HIP_MATH_CONSTANTS_H + +#if !defined(__HIPCC_RTC__) +#include +#endif + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) +#include "hip/amd_detail/amd_hip_math_constants.h" +#elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#include "hip/nvidia_detail/nvidia_hip_math_constants.h" +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif +#endif diff --git a/external/hip/hip_profile.h b/external/hip/hip_profile.h new file mode 100644 index 0000000..4fef521 --- /dev/null +++ b/external/hip/hip_profile.h @@ -0,0 +1,27 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HIP_PROFILE_H +#define HIP_INCLUDE_HIP_HIP_PROFILE_H + +#define HIP_SCOPED_MARKER(markerName, group) +#define HIP_BEGIN_MARKER(markerName, group) +#define HIP_END_MARKER() + +#endif diff --git a/external/hip/hip_runtime.h b/external/hip/hip_runtime.h new file mode 100644 index 0000000..7834d0e --- /dev/null +++ b/external/hip/hip_runtime.h @@ -0,0 +1,70 @@ +/* +Copyright (c) 2015 - 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +//! HIP = Heterogeneous-compute Interface for Portability +//! +//! Define a extremely thin runtime layer that allows source code to be compiled unmodified +//! through either AMD CLANG or NVCC. Key features tend to be in the spirit +//! and terminology of CUDA, but with a portable path to other accelerators as well: +// +//! Both paths support rich C++ features including classes, templates, lambdas, etc. +//! Runtime API is C +//! Memory management is based on pure pointers and resembles malloc/free/copy. +// +//! hip_runtime.h : includes everything in hip_api.h, plus math builtins and kernel launch +//! macros. hip_runtime_api.h : Defines HIP API. This is a C header file and does not use any C++ +//! features. + +#ifndef HIP_INCLUDE_HIP_HIP_RUNTIME_H +#define HIP_INCLUDE_HIP_HIP_RUNTIME_H + +#if !defined(__HIPCC_RTC__) +// Some standard header files, these are included by hc.hpp and so want to make them avail on both +// paths to provide a consistent include env and avoid "missing symbol" errors that only appears +// on NVCC path: +#if __cplusplus +#include +#include +#else +#include +#include +#endif // __cplusplus +#endif // !defined(__HIPCC_RTC__) + +#include +#include + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) +#include +#elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#include +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif + +#if !defined(__HIPCC_RTC__) +#include +#include +#endif // !defined(__HIPCC_RTC__) +#include + +#endif diff --git a/external/hip/hip_runtime_api.h b/external/hip/hip_runtime_api.h new file mode 100644 index 0000000..5148481 --- /dev/null +++ b/external/hip/hip_runtime_api.h @@ -0,0 +1,10431 @@ +/* +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + +* @file hip_runtime_api.h + * + * @brief Defines the API signatures for HIP runtime. + * This file can be compiled with a standard compiler. + */ + +#ifndef HIP_INCLUDE_HIP_HIP_RUNTIME_API_H +#define HIP_INCLUDE_HIP_HIP_RUNTIME_API_H + +#if __cplusplus +#include +#include +#include +#include +#else +#include +#include +#include +#endif + +#include +#include +#include + +enum { + HIP_SUCCESS = 0, + HIP_ERROR_INVALID_VALUE, + HIP_ERROR_NOT_INITIALIZED, + HIP_ERROR_LAUNCH_OUT_OF_RESOURCES +}; +// hack to get these to show up in Doxygen: +/** + * @defgroup GlobalDefs Global enum and defines + * @{ + * + */ +/** + * hipDeviceArch_t + * + */ +typedef struct { + // 32-bit Atomics + unsigned hasGlobalInt32Atomics : 1; ///< 32-bit integer atomics for global memory. + unsigned hasGlobalFloatAtomicExch : 1; ///< 32-bit float atomic exch for global memory. + unsigned hasSharedInt32Atomics : 1; ///< 32-bit integer atomics for shared memory. + unsigned hasSharedFloatAtomicExch : 1; ///< 32-bit float atomic exch for shared memory. + unsigned hasFloatAtomicAdd : 1; ///< 32-bit float atomic add in global and shared memory. + + // 64-bit Atomics + unsigned hasGlobalInt64Atomics : 1; ///< 64-bit integer atomics for global memory. + unsigned hasSharedInt64Atomics : 1; ///< 64-bit integer atomics for shared memory. + + // Doubles + unsigned hasDoubles : 1; ///< Double-precision floating point. + + // Warp cross-lane operations + unsigned hasWarpVote : 1; ///< Warp vote instructions (__any, __all). + unsigned hasWarpBallot : 1; ///< Warp ballot instructions (__ballot). + unsigned hasWarpShuffle : 1; ///< Warp shuffle operations. (__shfl_*). + unsigned hasFunnelShift : 1; ///< Funnel two words into one with shift&mask caps. + + // Sync + unsigned hasThreadFenceSystem : 1; ///< __threadfence_system. + unsigned hasSyncThreadsExt : 1; ///< __syncthreads_count, syncthreads_and, syncthreads_or. + + // Misc + unsigned hasSurfaceFuncs : 1; ///< Surface functions. + unsigned has3dGrid : 1; ///< Grid and group dims are 3D (rather than 2D). + unsigned hasDynamicParallelism : 1; ///< Dynamic parallelism. +} hipDeviceArch_t; + +typedef struct hipUUID_t { + char bytes[16]; +} hipUUID; + +//--- +// Common headers for both NVCC and HIP-Clang paths: + +#define hipGetDeviceProperties hipGetDevicePropertiesR0600 +#define hipDeviceProp_t hipDeviceProp_tR0600 +#define hipChooseDevice hipChooseDeviceR0600 + +/** + * hipDeviceProp + * + */ +typedef struct hipDeviceProp_t { + char name[256]; ///< Device name. + hipUUID uuid; ///< UUID of a device + char luid[8]; ///< 8-byte unique identifier. Only valid on windows + unsigned int luidDeviceNodeMask; ///< LUID node mask + size_t totalGlobalMem; ///< Size of global memory region (in bytes). + size_t sharedMemPerBlock; ///< Size of shared memory per block (in bytes). + int regsPerBlock; ///< Registers per block. + int warpSize; ///< Warp size. + size_t memPitch; ///< Maximum pitch in bytes allowed by memory copies + ///< pitched memory + int maxThreadsPerBlock; ///< Max work items per work group or workgroup max size. + int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block. + int maxGridSize[3]; ///< Max grid dimensions (XYZ). + int clockRate; ///< Max clock frequency of the multiProcessors in khz. + size_t totalConstMem; ///< Size of shared constant memory region on the device + ///< (in bytes). + int major; ///< Major compute capability version. This indicates the core instruction set + ///< of the GPU architecture. For example, a value of 11 would correspond to + ///< Navi III (RDNA3). See the arch feature flags for portable ways to query + ///< feature caps. + int minor; ///< Minor compute capability version. This indicates a particular configuration, + ///< feature set, or variation within the group represented by the major compute + ///< capability version. For example, different models within the same major version + ///< might have varying levels of support for certain features or optimizations. + ///< See the arch feature flags for portable ways to query feature caps. + size_t textureAlignment; ///< Alignment requirement for textures + size_t texturePitchAlignment; ///< Pitch alignment requirement for texture references bound to + int deviceOverlap; ///< Deprecated. Use asyncEngineCount instead + int multiProcessorCount; ///< Number of multi-processors. When the GPU works in Compute + ///< Unit (CU) mode, this value equals the number of CUs; + ///< when in Workgroup Processor (WGP) mode, this value equels + ///< half of CUs, because a single WGP contains two CUs. + int kernelExecTimeoutEnabled; ///< Run time limit for kernels executed on the device + int integrated; ///< APU vs dGPU + int canMapHostMemory; ///< Check whether HIP can map host memory + int computeMode; ///< Compute mode. + int maxTexture1D; ///< Maximum number of elements in 1D images + int maxTexture1DMipmap; ///< Maximum 1D mipmap texture size + int maxTexture1DLinear; ///< Maximum size for 1D textures bound to linear memory + int maxTexture2D[2]; ///< Maximum dimensions (width, height) of 2D images, in image elements + int maxTexture2DMipmap[2]; ///< Maximum number of elements in 2D array mipmap of images + int maxTexture2DLinear[3]; ///< Maximum 2D tex dimensions if tex are bound to pitched memory + int maxTexture2DGather[2]; ///< Maximum 2D tex dimensions if gather has to be performed + int maxTexture3D[3]; ///< Maximum dimensions (width, height, depth) of 3D images, in image + ///< elements + int maxTexture3DAlt[3]; ///< Maximum alternate 3D texture dims + int maxTextureCubemap; ///< Maximum cubemap texture dims + int maxTexture1DLayered[2]; ///< Maximum number of elements in 1D array images + int maxTexture2DLayered[3]; ///< Maximum number of elements in 2D array images + int maxTextureCubemapLayered[2]; ///< Maximum cubemaps layered texture dims + int maxSurface1D; ///< Maximum 1D surface size + int maxSurface2D[2]; ///< Maximum 2D surface size + int maxSurface3D[3]; ///< Maximum 3D surface size + int maxSurface1DLayered[2]; ///< Maximum 1D layered surface size + int maxSurface2DLayered[3]; ///< Maximum 2D layared surface size + int maxSurfaceCubemap; ///< Maximum cubemap surface size + int maxSurfaceCubemapLayered[2]; ///< Maximum cubemap layered surface size + size_t surfaceAlignment; ///< Alignment requirement for surface + int concurrentKernels; ///< Device can possibly execute multiple kernels concurrently. + int ECCEnabled; ///< Device has ECC support enabled + int pciBusID; ///< PCI Bus ID. + int pciDeviceID; ///< PCI Device ID + int pciDomainID; ///< PCI Domain ID + int tccDriver; ///< 1:If device is Tesla device using TCC driver, else 0 + int asyncEngineCount; ///< Number of async engines + int unifiedAddressing; ///< Does device and host share unified address space + int memoryClockRate; ///< Max global memory clock frequency in khz. + int memoryBusWidth; ///< Global memory bus width in bits. + int l2CacheSize; ///< L2 cache size. + int persistingL2CacheMaxSize; ///< Device's max L2 persisting lines in bytes + int maxThreadsPerMultiProcessor; ///< Maximum resident threads per multi-processor. + int streamPrioritiesSupported; ///< Device supports stream priority + int globalL1CacheSupported; ///< Indicates globals are cached in L1 + int localL1CacheSupported; ///< Locals are cahced in L1 + size_t sharedMemPerMultiprocessor; ///< Amount of shared memory available per multiprocessor. + int regsPerMultiprocessor; ///< registers available per multiprocessor + int managedMemory; ///< Device supports allocating managed memory on this system + int isMultiGpuBoard; ///< 1 if device is on a multi-GPU board, 0 if not. + int multiGpuBoardGroupID; ///< Unique identifier for a group of devices on same multiboard GPU + int hostNativeAtomicSupported; ///< Link between host and device supports native atomics + int singleToDoublePrecisionPerfRatio; ///< Deprecated. CUDA only. + int pageableMemoryAccess; ///< Device supports coherently accessing pageable memory + ///< without calling hipHostRegister on it + int concurrentManagedAccess; ///< Device can coherently access managed memory concurrently with + ///< the CPU + int computePreemptionSupported; ///< Is compute preemption supported on the device + int canUseHostPointerForRegisteredMem; ///< Device can access host registered memory with same + ///< address as the host + int cooperativeLaunch; ///< HIP device supports cooperative launch + int cooperativeMultiDeviceLaunch; ///< HIP device supports cooperative launch on multiple + ///< devices + size_t sharedMemPerBlockOptin; ///< Per device m ax shared mem per block usable by special opt in + int pageableMemoryAccessUsesHostPageTables; ///< Device accesses pageable memory via the host's + ///< page tables + int directManagedMemAccessFromHost; ///< Host can directly access managed memory on the device + ///< without migration + int maxBlocksPerMultiProcessor; ///< Max number of blocks on CU + int accessPolicyMaxWindowSize; ///< Max value of access policy window + size_t reservedSharedMemPerBlock; ///< Shared memory reserved by driver per block + int hostRegisterSupported; ///< Device supports hipHostRegister + int sparseHipArraySupported; ///< Indicates if device supports sparse hip arrays + int hostRegisterReadOnlySupported; ///< Device supports using the hipHostRegisterReadOnly flag + ///< with hipHostRegistger + int timelineSemaphoreInteropSupported; ///< Indicates external timeline semaphore support + int memoryPoolsSupported; ///< Indicates if device supports hipMallocAsync and hipMemPool APIs + int gpuDirectRDMASupported; ///< Indicates device support of RDMA APIs + unsigned int gpuDirectRDMAFlushWritesOptions; ///< Bitmask to be interpreted according to + ///< hipFlushGPUDirectRDMAWritesOptions + int gpuDirectRDMAWritesOrdering; ///< value of hipGPUDirectRDMAWritesOrdering + unsigned int + memoryPoolSupportedHandleTypes; ///< Bitmask of handle types support with mempool based IPC + int deferredMappingHipArraySupported; ///< Device supports deferred mapping HIP arrays and HIP + ///< mipmapped arrays + int ipcEventSupported; ///< Device supports IPC events + int clusterLaunch; ///< Device supports cluster launch + int unifiedFunctionPointers; ///< Indicates device supports unified function pointers + int reserved[63]; ///< CUDA Reserved. + + int hipReserved[32]; ///< Reserved for adding new entries for HIP/CUDA. + + /* HIP Only struct members */ + char gcnArchName[256]; ///< AMD GCN Arch Name. HIP Only. + size_t maxSharedMemoryPerMultiProcessor; ///< Maximum Shared Memory Per CU. HIP Only. + int clockInstructionRate; ///< Frequency in khz of the timer used by the device-side "clock*" + ///< instructions. New for HIP. + hipDeviceArch_t arch; ///< Architectural feature flags. New for HIP. + unsigned int* hdpMemFlushCntl; ///< Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register + unsigned int* hdpRegFlushCntl; ///< Addres of HDP_REG_COHERENCY_FLUSH_CNTL register + int cooperativeMultiDeviceUnmatchedFunc; ///< HIP device supports cooperative launch on + ///< multiple + /// devices with unmatched functions + int cooperativeMultiDeviceUnmatchedGridDim; ///< HIP device supports cooperative launch on + ///< multiple + /// devices with unmatched grid dimensions + int cooperativeMultiDeviceUnmatchedBlockDim; ///< HIP device supports cooperative launch on + ///< multiple + /// devices with unmatched block dimensions + int cooperativeMultiDeviceUnmatchedSharedMem; ///< HIP device supports cooperative launch on + ///< multiple + /// devices with unmatched shared memories + int isLargeBar; ///< 1: if it is a large PCI bar device, else 0 + int asicRevision; ///< Revision of the GPU in this device +} hipDeviceProp_t; + +/** + * hipMemoryType (for pointer attributes) + * + * @note hipMemoryType enum values are combination of cudaMemoryType and cuMemoryType and AMD + * specific enum values. + * + */ +typedef enum hipMemoryType { + hipMemoryTypeUnregistered = 0, ///< Unregistered memory + hipMemoryTypeHost = 1, ///< Memory is physically located on host + hipMemoryTypeDevice = 2, ///< Memory is physically located on device. (see deviceId for + ///< specific device) + hipMemoryTypeManaged = 3, ///< Managed memory, automaticallly managed by the unified + ///< memory system + ///< place holder for new values. + hipMemoryTypeArray = 10, ///< Array memory, physically located on device. (see deviceId for + ///< specific device) + hipMemoryTypeUnified = 11 ///< unified address space + +} hipMemoryType; + +/** + * Pointer attributes + */ +typedef struct hipPointerAttribute_t { + enum hipMemoryType type; + int device; + void* devicePointer; + void* hostPointer; + int isManaged; + unsigned allocationFlags; /* flags specified when memory was allocated*/ + /* peers? */ +} hipPointerAttribute_t; + +// Ignoring error-code return values from hip APIs is discouraged. On C++17, +// we can make that yield a warning +#if __cplusplus >= 201703L +#define __HIP_NODISCARD [[nodiscard]] +#else +#define __HIP_NODISCARD +#endif + +/** + * HIP error type + * + */ +// Developer note - when updating these, update the hipErrorName and hipErrorString functions in +// NVCC and HIP-Clang paths Also update the hipCUDAErrorTohipError function in NVCC path. + +typedef enum __HIP_NODISCARD hipError_t { + hipSuccess = 0, ///< Successful completion. + hipErrorInvalidValue = 1, ///< One or more of the parameters passed to the API call is NULL + ///< or not in an acceptable range. + hipErrorOutOfMemory = 2, ///< out of memory range. + // Deprecated + hipErrorMemoryAllocation = 2, ///< Memory allocation error. + hipErrorNotInitialized = 3, ///< Invalid not initialized + // Deprecated + hipErrorInitializationError = 3, + hipErrorDeinitialized = 4, ///< Deinitialized + hipErrorProfilerDisabled = 5, + hipErrorProfilerNotInitialized = 6, + hipErrorProfilerAlreadyStarted = 7, + hipErrorProfilerAlreadyStopped = 8, + hipErrorInvalidConfiguration = 9, ///< Invalide configuration + hipErrorInvalidPitchValue = 12, ///< Invalid pitch value + hipErrorInvalidSymbol = 13, ///< Invalid symbol + hipErrorInvalidDevicePointer = 17, ///< Invalid Device Pointer + hipErrorInvalidMemcpyDirection = 21, ///< Invalid memory copy direction + hipErrorInsufficientDriver = 35, + hipErrorMissingConfiguration = 52, + hipErrorPriorLaunchFailure = 53, + hipErrorInvalidDeviceFunction = 98, ///< Invalid device function + hipErrorNoDevice = 100, ///< Call to hipGetDeviceCount returned 0 devices + hipErrorInvalidDevice = 101, ///< DeviceID must be in range from 0 to compute-devices. + hipErrorInvalidImage = 200, ///< Invalid image + hipErrorInvalidContext = 201, ///< Produced when input context is invalid. + hipErrorContextAlreadyCurrent = 202, + hipErrorMapFailed = 205, + // Deprecated + hipErrorMapBufferObjectFailed = 205, ///< Produced when the IPC memory attach failed from ROCr. + hipErrorUnmapFailed = 206, + hipErrorArrayIsMapped = 207, + hipErrorAlreadyMapped = 208, + hipErrorNoBinaryForGpu = 209, + hipErrorAlreadyAcquired = 210, + hipErrorNotMapped = 211, + hipErrorNotMappedAsArray = 212, + hipErrorNotMappedAsPointer = 213, + hipErrorECCNotCorrectable = 214, + hipErrorUnsupportedLimit = 215, ///< Unsupported limit + hipErrorContextAlreadyInUse = 216, ///< The context is already in use + hipErrorPeerAccessUnsupported = 217, + hipErrorInvalidKernelFile = 218, ///< In CUDA DRV, it is CUDA_ERROR_INVALID_PTX + hipErrorInvalidGraphicsContext = 219, + hipErrorInvalidSource = 300, ///< Invalid source. + hipErrorFileNotFound = 301, ///< the file is not found. + hipErrorSharedObjectSymbolNotFound = 302, + hipErrorSharedObjectInitFailed = 303, ///< Failed to initialize shared object. + hipErrorOperatingSystem = 304, ///< Not the correct operating system + hipErrorInvalidHandle = 400, ///< Invalide handle + // Deprecated + hipErrorInvalidResourceHandle = 400, ///< Resource handle (hipEvent_t or hipStream_t) invalid. + hipErrorIllegalState = 401, ///< Resource required is not in a valid state to perform operation. + hipErrorNotFound = 500, ///< Not found + hipErrorNotReady = 600, ///< Indicates that asynchronous operations enqueued earlier are not + ///< ready. This is not actually an error, but is used to distinguish + ///< from hipSuccess (which indicates completion). APIs that return + ///< this error include hipEventQuery and hipStreamQuery. + hipErrorIllegalAddress = 700, + hipErrorLaunchOutOfResources = 701, ///< Out of resources error. + hipErrorLaunchTimeOut = 702, ///< Timeout for the launch. + hipErrorPeerAccessAlreadyEnabled = 704, ///< Peer access was already enabled from the current + ///< device. + hipErrorPeerAccessNotEnabled = 705, ///< Peer access was never enabled from the current device. + hipErrorSetOnActiveProcess = 708, ///< The process is active. + hipErrorContextIsDestroyed = 709, ///< The context is already destroyed + hipErrorAssert = 710, ///< Produced when the kernel calls assert. + hipErrorHostMemoryAlreadyRegistered = 712, ///< Produced when trying to lock a page-locked + ///< memory. + hipErrorHostMemoryNotRegistered = 713, ///< Produced when trying to unlock a non-page-locked + ///< memory. + hipErrorLaunchFailure = 719, ///< An exception occurred on the device while executing a kernel. + hipErrorCooperativeLaunchTooLarge = 720, ///< This error indicates that the number of blocks + ///< launched per grid for a kernel that was launched + ///< via cooperative launch APIs exceeds the maximum + ///< number of allowed blocks for the current device. + hipErrorNotSupported = 801, ///< Produced when the hip API is not supported/implemented + hipErrorStreamCaptureUnsupported = 900, ///< The operation is not permitted when the stream + ///< is capturing. + hipErrorStreamCaptureInvalidated = 901, ///< The current capture sequence on the stream + ///< has been invalidated due to a previous error. + hipErrorStreamCaptureMerge = 902, ///< The operation would have resulted in a merge of + ///< two independent capture sequences. + hipErrorStreamCaptureUnmatched = 903, ///< The capture was not initiated in this stream. + hipErrorStreamCaptureUnjoined = 904, ///< The capture sequence contains a fork that was not + ///< joined to the primary stream. + hipErrorStreamCaptureIsolation = 905, ///< A dependency would have been created which crosses + ///< the capture sequence boundary. Only implicit + ///< in-stream ordering dependencies are allowed + ///< to cross the boundary + hipErrorStreamCaptureImplicit = 906, ///< The operation would have resulted in a disallowed + ///< implicit dependency on a current capture sequence + ///< from hipStreamLegacy. + hipErrorCapturedEvent = 907, ///< The operation is not permitted on an event which was last + ///< recorded in a capturing stream. + hipErrorStreamCaptureWrongThread = 908, ///< A stream capture sequence not initiated with + ///< the hipStreamCaptureModeRelaxed argument to + ///< hipStreamBeginCapture was passed to + ///< hipStreamEndCapture in a different thread. + hipErrorGraphExecUpdateFailure = 910, ///< This error indicates that the graph update + ///< not performed because it included changes which + ///< violated constraintsspecific to instantiated graph + ///< update. + hipErrorInvalidChannelDescriptor = 911, ///< Invalid channel descriptor. + hipErrorInvalidTexture = 912, ///< Invalid texture. + hipErrorUnknown = 999, ///< Unknown error. + // HSA Runtime Error Codes start here. + hipErrorRuntimeMemory = 1052, ///< HSA runtime memory call returned error. Typically not seen + ///< in production systems. + hipErrorRuntimeOther = 1053, ///< HSA runtime call other than memory returned error. Typically + ///< not seen in production systems. + hipErrorTbd ///< Marker that more error codes are needed. +} hipError_t; + +#undef __HIP_NODISCARD + +/** + * hipDeviceAttribute_t + * hipDeviceAttributeUnused number: 5 + */ +typedef enum hipDeviceAttribute_t { + hipDeviceAttributeCudaCompatibleBegin = 0, + + hipDeviceAttributeEccEnabled = + hipDeviceAttributeCudaCompatibleBegin, ///< Whether ECC support is enabled. + hipDeviceAttributeAccessPolicyMaxWindowSize, ///< Cuda only. The maximum size of the window + ///< policy in bytes. + hipDeviceAttributeAsyncEngineCount, ///< Asynchronous engines number. + hipDeviceAttributeCanMapHostMemory, ///< Whether host memory can be mapped into device address + ///< space + hipDeviceAttributeCanUseHostPointerForRegisteredMem, ///< Device can access host registered + ///< memory at the same virtual address as + ///< the CPU + hipDeviceAttributeClockRate, ///< Peak clock frequency in kilohertz. + hipDeviceAttributeComputeMode, ///< Compute mode that device is currently in. + hipDeviceAttributeComputePreemptionSupported, ///< Device supports Compute Preemption. + hipDeviceAttributeConcurrentKernels, ///< Device can possibly execute multiple kernels + ///< concurrently. + hipDeviceAttributeConcurrentManagedAccess, ///< Device can coherently access managed memory + ///< concurrently with the CPU + hipDeviceAttributeCooperativeLaunch, ///< Support cooperative launch + hipDeviceAttributeCooperativeMultiDeviceLaunch, ///< Support cooperative launch on multiple + ///< devices + hipDeviceAttributeDeviceOverlap, ///< Device can concurrently copy memory and execute a kernel. + ///< Deprecated. Use instead asyncEngineCount. + hipDeviceAttributeDirectManagedMemAccessFromHost, ///< Host can directly access managed memory on + ///< the device without migration + hipDeviceAttributeGlobalL1CacheSupported, ///< Device supports caching globals in L1 + hipDeviceAttributeHostNativeAtomicSupported, ///< Link between the device and the host supports + ///< native atomic operations + hipDeviceAttributeIntegrated, ///< Device is integrated GPU + hipDeviceAttributeIsMultiGpuBoard, ///< Multiple GPU devices. + hipDeviceAttributeKernelExecTimeout, ///< Run time limit for kernels executed on the device + hipDeviceAttributeL2CacheSize, ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 + ///< cache. + hipDeviceAttributeLocalL1CacheSupported, ///< caching locals in L1 is supported + hipDeviceAttributeLuid, ///< 8-byte locally unique identifier in 8 bytes. Undefined on TCC and + ///< non-Windows platforms + hipDeviceAttributeLuidDeviceNodeMask, ///< Luid device node mask. Undefined on TCC and + ///< non-Windows platforms + hipDeviceAttributeComputeCapabilityMajor, ///< Major compute capability version number. + hipDeviceAttributeManagedMemory, ///< Device supports allocating managed memory on this system + hipDeviceAttributeMaxBlocksPerMultiProcessor, ///< Max block size per multiprocessor + hipDeviceAttributeMaxBlockDimX, ///< Max block size in width. + hipDeviceAttributeMaxBlockDimY, ///< Max block size in height. + hipDeviceAttributeMaxBlockDimZ, ///< Max block size in depth. + hipDeviceAttributeMaxGridDimX, ///< Max grid size in width. + hipDeviceAttributeMaxGridDimY, ///< Max grid size in height. + hipDeviceAttributeMaxGridDimZ, ///< Max grid size in depth. + hipDeviceAttributeMaxSurface1D, ///< Maximum size of 1D surface. + hipDeviceAttributeMaxSurface1DLayered, ///< Cuda only. Maximum dimensions of 1D layered surface. + hipDeviceAttributeMaxSurface2D, ///< Maximum dimension (width, height) of 2D surface. + hipDeviceAttributeMaxSurface2DLayered, ///< Cuda only. Maximum dimensions of 2D layered surface. + hipDeviceAttributeMaxSurface3D, ///< Maximum dimension (width, height, depth) of 3D surface. + hipDeviceAttributeMaxSurfaceCubemap, ///< Cuda only. Maximum dimensions of Cubemap surface. + hipDeviceAttributeMaxSurfaceCubemapLayered, ///< Cuda only. Maximum dimension of Cubemap layered + ///< surface. + hipDeviceAttributeMaxTexture1DWidth, ///< Maximum size of 1D texture. + hipDeviceAttributeMaxTexture1DLayered, ///< Maximum dimensions of 1D layered texture. + hipDeviceAttributeMaxTexture1DLinear, ///< Maximum number of elements allocatable in a 1D linear + ///< texture. Use cudaDeviceGetTexture1DLinearMaxWidth() + ///< instead on Cuda. + hipDeviceAttributeMaxTexture1DMipmap, ///< Maximum size of 1D mipmapped texture. + hipDeviceAttributeMaxTexture2DWidth, ///< Maximum dimension width of 2D texture. + hipDeviceAttributeMaxTexture2DHeight, ///< Maximum dimension hight of 2D texture. + hipDeviceAttributeMaxTexture2DGather, ///< Maximum dimensions of 2D texture if gather operations + ///< performed. + hipDeviceAttributeMaxTexture2DLayered, ///< Maximum dimensions of 2D layered texture. + hipDeviceAttributeMaxTexture2DLinear, ///< Maximum dimensions (width, height, pitch) of 2D + ///< textures bound to pitched memory. + hipDeviceAttributeMaxTexture2DMipmap, ///< Maximum dimensions of 2D mipmapped texture. + hipDeviceAttributeMaxTexture3DWidth, ///< Maximum dimension width of 3D texture. + hipDeviceAttributeMaxTexture3DHeight, ///< Maximum dimension height of 3D texture. + hipDeviceAttributeMaxTexture3DDepth, ///< Maximum dimension depth of 3D texture. + hipDeviceAttributeMaxTexture3DAlt, ///< Maximum dimensions of alternate 3D texture. + hipDeviceAttributeMaxTextureCubemap, ///< Maximum dimensions of Cubemap texture + hipDeviceAttributeMaxTextureCubemapLayered, ///< Maximum dimensions of Cubemap layered texture. + hipDeviceAttributeMaxThreadsDim, ///< Maximum dimension of a block + hipDeviceAttributeMaxThreadsPerBlock, ///< Maximum number of threads per block. + hipDeviceAttributeMaxThreadsPerMultiProcessor, ///< Maximum resident threads per multiprocessor. + hipDeviceAttributeMaxPitch, ///< Maximum pitch in bytes allowed by memory copies + hipDeviceAttributeMemoryBusWidth, ///< Global memory bus width in bits. + hipDeviceAttributeMemoryClockRate, ///< Peak memory clock frequency in kilohertz. + hipDeviceAttributeComputeCapabilityMinor, ///< Minor compute capability version number. + hipDeviceAttributeMultiGpuBoardGroupID, ///< Unique ID of device group on the same multi-GPU + ///< board + hipDeviceAttributeMultiprocessorCount, ///< Number of multi-processors. When the GPU works in Compute + ///< Unit (CU) mode, this value equals the number of CUs; + ///< when in Workgroup Processor (WGP) mode, this value equels + ///< half of CUs, because a single WGP contains two CUs. + hipDeviceAttributeUnused1, ///< Previously hipDeviceAttributeName + hipDeviceAttributePageableMemoryAccess, ///< Device supports coherently accessing pageable memory + ///< without calling hipHostRegister on it + hipDeviceAttributePageableMemoryAccessUsesHostPageTables, ///< Device accesses pageable memory + ///< via the host's page tables + hipDeviceAttributePciBusId, ///< PCI Bus ID. + hipDeviceAttributePciDeviceId, ///< PCI Device ID. Returns pcie slot id + hipDeviceAttributePciDomainId, ///< PCI Domain Id. + hipDeviceAttributePciDomainID = + hipDeviceAttributePciDomainId, ///< PCI Domain ID, for backward compatibility. + hipDeviceAttributePersistingL2CacheMaxSize, ///< Maximum l2 persisting lines capacity in bytes + hipDeviceAttributeMaxRegistersPerBlock, ///< 32-bit registers available to a thread block. This + ///< number is shared by all thread blocks simultaneously + ///< resident on a multiprocessor. + hipDeviceAttributeMaxRegistersPerMultiprocessor, ///< 32-bit registers available per block. + hipDeviceAttributeReservedSharedMemPerBlock, ///< Shared memory reserved by CUDA driver per + ///< block. + hipDeviceAttributeMaxSharedMemoryPerBlock, ///< Maximum shared memory available per block in + ///< bytes. + hipDeviceAttributeSharedMemPerBlockOptin, ///< Maximum shared memory per block usable by special + ///< opt in. + hipDeviceAttributeSharedMemPerMultiprocessor, ///< Shared memory available per multiprocessor. + hipDeviceAttributeSingleToDoublePrecisionPerfRatio, ///< Cuda only. Performance ratio of single + ///< precision to double precision. + hipDeviceAttributeStreamPrioritiesSupported, ///< Whether to support stream priorities. + hipDeviceAttributeSurfaceAlignment, ///< Alignment requirement for surfaces + hipDeviceAttributeTccDriver, ///< Cuda only. Whether device is a Tesla device using TCC driver + hipDeviceAttributeTextureAlignment, ///< Alignment requirement for textures + hipDeviceAttributeTexturePitchAlignment, ///< Pitch alignment requirement for 2D texture + ///< references bound to pitched memory; + hipDeviceAttributeTotalConstantMemory, ///< Constant memory size in bytes. + hipDeviceAttributeTotalGlobalMem, ///< Global memory available on devicice. + hipDeviceAttributeUnifiedAddressing, ///< Cuda only. An unified address space shared with the + ///< host. + hipDeviceAttributeUnused2, ///< Previously hipDeviceAttributeUuid + hipDeviceAttributeWarpSize, ///< Warp size in threads. + hipDeviceAttributeMemoryPoolsSupported, ///< Device supports HIP Stream Ordered Memory Allocator + hipDeviceAttributeVirtualMemoryManagementSupported, ///< Device supports HIP virtual memory + ///< management + hipDeviceAttributeHostRegisterSupported, ///< Can device support host memory registration via + ///< hipHostRegister + hipDeviceAttributeMemoryPoolSupportedHandleTypes, ///< Supported handle mask for HIP Stream + ///< Ordered Memory Allocator + hipDeviceAttributeHostNumaId, ///< NUMA ID of the cpu node closest to the device, + ///< or -1 when NUMA isn't supported + + hipDeviceAttributeCudaCompatibleEnd = 9999, + hipDeviceAttributeAmdSpecificBegin = 10000, + + hipDeviceAttributeClockInstructionRate = + hipDeviceAttributeAmdSpecificBegin, ///< Frequency in khz of the timer used by the + ///< device-side "clock*" + hipDeviceAttributeUnused3, ///< Previously hipDeviceAttributeArch + hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, ///< Maximum Shared Memory PerMultiprocessor. + hipDeviceAttributeUnused4, ///< Previously hipDeviceAttributeGcnArch + hipDeviceAttributeUnused5, ///< Previously hipDeviceAttributeGcnArchName + hipDeviceAttributeHdpMemFlushCntl, ///< Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register + hipDeviceAttributeHdpRegFlushCntl, ///< Address of the HDP_REG_COHERENCY_FLUSH_CNTL register + hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc, ///< Supports cooperative launch on + ///< multiple devices with unmatched + ///< functions + hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim, ///< Supports cooperative launch on + ///< multiple devices with unmatched + ///< grid dimensions + hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim, ///< Supports cooperative launch on + ///< multiple devices with unmatched + ///< block dimensions + hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem, ///< Supports cooperative launch on + ///< multiple devices with unmatched + ///< shared memories + hipDeviceAttributeIsLargeBar, ///< Whether it is LargeBar + hipDeviceAttributeAsicRevision, ///< Revision of the GPU in this device + hipDeviceAttributeCanUseStreamWaitValue, ///< '1' if Device supports hipStreamWaitValue32() and + ///< hipStreamWaitValue64(), '0' otherwise. + hipDeviceAttributeImageSupport, ///< '1' if Device supports image, '0' otherwise. + hipDeviceAttributePhysicalMultiProcessorCount, ///< All available physical compute + ///< units for the device + hipDeviceAttributeFineGrainSupport, ///< '1' if Device supports fine grain, '0' otherwise + hipDeviceAttributeWallClockRate, ///< Constant frequency of wall clock in kilohertz. + hipDeviceAttributeNumberOfXccs, ///< The number of XCC(s) on the device + hipDeviceAttributeMaxAvailableVgprsPerThread, ///< Max number of available (directly or + ///< indirectly addressable) VGPRs per thread in + ///< DWORDs. + hipDeviceAttributePciChipId, ///< GPU Manufacturer device id + hipDeviceAttributeExpertSchedMode, ///< '1' if Device supports expert scheduling mode, + ///< '0' otherwise. + + hipDeviceAttributeAmdSpecificEnd = 19999, + hipDeviceAttributeVendorSpecificBegin = 20000, + // Extended attributes for vendors +} hipDeviceAttribute_t; + +// Flags that can be used with hipGetProcAddress. +/** Default flag. Equivalent to HIP_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM if compiled with + * -fgpu-default-stream=per-thread flag or HIP_API_PER_THREAD_DEFAULT_STREAM macro is + * defined.*/ +#define HIP_GET_PROC_ADDRESS_DEFAULT 0x0 + +/** Search for all symbols except the corresponding per-thread versions.*/ +#define HIP_GET_PROC_ADDRESS_LEGACY_STREAM 0x1 + +/** Search for all symbols including the per-thread versions. If a per-thread version cannot be + * found, returns the legacy version.*/ +#define HIP_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM 0x2 + +typedef enum hipDriverProcAddressQueryResult { + HIP_GET_PROC_ADDRESS_SUCCESS = 0, + HIP_GET_PROC_ADDRESS_SYMBOL_NOT_FOUND = 1, + HIP_GET_PROC_ADDRESS_VERSION_NOT_SUFFICIENT = 2 +} hipDriverProcAddressQueryResult; + +enum hipComputeMode { + hipComputeModeDefault = 0, + hipComputeModeExclusive = 1, + hipComputeModeProhibited = 2, + hipComputeModeExclusiveProcess = 3 +}; + +enum hipFlushGPUDirectRDMAWritesOptions { + hipFlushGPUDirectRDMAWritesOptionHost = 1 << 0, + hipFlushGPUDirectRDMAWritesOptionMemOps = 1 << 1 +}; + +enum hipGPUDirectRDMAWritesOrdering { + hipGPUDirectRDMAWritesOrderingNone = 0, + hipGPUDirectRDMAWritesOrderingOwner = 100, + hipGPUDirectRDMAWritesOrderingAllDevices = 200 +}; + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) + +#ifndef GENERIC_GRID_LAUNCH +#define GENERIC_GRID_LAUNCH 1 +#endif +#include +#include +#include +#include +#if defined(_MSC_VER) +#define HIP_DEPRECATED(msg) __declspec(deprecated(msg)) +#else // !defined(_MSC_VER) +#define HIP_DEPRECATED(msg) __attribute__((deprecated(msg))) +#endif // !defined(_MSC_VER) +#define HIP_DEPRECATED_MSG \ + "This API is marked as deprecated and might not be supported in future releases. For more " \ + "details please refer " \ + "https://github.com/ROCm/HIP/blob/develop/docs/reference/deprecated_api_list.md" +#define HIP_LAUNCH_PARAM_BUFFER_POINTER ((void*)0x01) +#define HIP_LAUNCH_PARAM_BUFFER_SIZE ((void*)0x02) +#define HIP_LAUNCH_PARAM_END ((void*)0x03) +#ifdef __cplusplus +#define __dparm(x) = x +#else +#define __dparm(x) +#endif +#ifdef __GNUC__ +#pragma GCC visibility push(default) +#endif +#ifdef __cplusplus +namespace hip_impl { +hipError_t hip_init(); +} // namespace hip_impl +#endif +// Structure definitions: +#ifdef __cplusplus +extern "C" { +#endif +//--- +// API-visible structures +typedef struct ihipCtx_t* hipCtx_t; +// Note many APIs also use integer deviceIds as an alternative to the device pointer: +typedef int hipDevice_t; +typedef enum hipDeviceP2PAttr { + hipDevP2PAttrPerformanceRank = 0, + hipDevP2PAttrAccessSupported, + hipDevP2PAttrNativeAtomicSupported, + hipDevP2PAttrHipArrayAccessSupported +} hipDeviceP2PAttr; +typedef enum hipDriverEntryPointQueryResult { + hipDriverEntryPointSuccess = 0, + hipDriverEntryPointSymbolNotFound = 1, + hipDriverEntryPointVersionNotSufficent = 2 +} hipDriverEntryPointQueryResult; +typedef struct ihipStream_t* hipStream_t; +#define hipIpcMemLazyEnablePeerAccess 0x01 +#define HIP_IPC_HANDLE_SIZE 64 +typedef struct hipIpcMemHandle_st { + char reserved[HIP_IPC_HANDLE_SIZE]; +} hipIpcMemHandle_t; +typedef struct hipIpcEventHandle_st { + char reserved[HIP_IPC_HANDLE_SIZE]; +} hipIpcEventHandle_t; +typedef struct ihipModule_t* hipModule_t; +typedef struct ihipModuleSymbol_t* hipFunction_t; +typedef struct ihipLinkState_t* hipLinkState_t; +typedef struct ihipLibrary_t* hipLibrary_t; +typedef struct ihipKernel_t* hipKernel_t; +/** + * HIP memory pool + */ +typedef struct ihipMemPoolHandle_t* hipMemPool_t; + +typedef struct hipFuncAttributes { + int binaryVersion; + int cacheModeCA; + size_t constSizeBytes; + size_t localSizeBytes; + int maxDynamicSharedSizeBytes; + int maxThreadsPerBlock; + int numRegs; + int preferredShmemCarveout; + int ptxVersion; + size_t sharedSizeBytes; +} hipFuncAttributes; +typedef struct ihipEvent_t* hipEvent_t; + +/** + * hipLimit + * + * @note In HIP device limit-related APIs, any input limit value other than those defined in the + * enum is treated as "UnsupportedLimit" by default. + */ +enum hipLimit_t { + hipLimitStackSize = 0x0, ///< Limit of stack size in bytes on the current device, per + ///< thread. The size is in units of 256 dwords, up to the + ///< limit of (128K - 16) + hipLimitPrintfFifoSize = 0x01, ///< Size limit in bytes of fifo used by printf call on the + ///< device. Currently not supported + hipLimitMallocHeapSize = 0x02, ///< Limit of heap size in bytes on the current device, should + ///< be less than the global memory size on the device + hipExtLimitScratchMin = 0x1000, ///< Minimum allowed value in bytes for scratch limit on this + ///< device. Valid only on Rocm device. This is read only. + hipExtLimitScratchMax = 0x1001, ///< Maximum allowed value in bytes for scratch limit on this + ///< device. Valid only on Rocm device. This is read only. + hipExtLimitScratchCurrent = 0x1002, ///< Current scratch limit threshold in bytes on this + ///< device. Must be between hipExtLimitScratchMin and + ///< hipExtLimitScratchMaxValid values. Valid only on Rocm + ///< device. This can be modified. + hipLimitRange ///< Supported limit range +}; + +/** + * Flags that can be used with hipStreamCreateWithFlags. + */ +// Flags that can be used with hipStreamCreateWithFlags. +/** Default stream creation flags. These are used with hipStreamCreate().*/ +#define hipStreamDefault 0x00 + +/** Stream does not implicitly synchronize with null stream.*/ +#define hipStreamNonBlocking 0x01 + +// Flags that can be used with hipEventCreateWithFlags. +/** Default flags.*/ +#define hipEventDefault 0x0 + +/** Waiting will yield CPU. Power-friendly and usage-friendly but may increase latency.*/ +#define hipEventBlockingSync 0x1 + +/** Disable event's capability to record timing information. May improve performance.*/ +#define hipEventDisableTiming 0x2 + +/** Event can support IPC. hipEventDisableTiming also must be set.*/ +#define hipEventInterprocess 0x4 + +// Flags that can be used with hipEventRecordWithFlags. +/** Default flag. */ +#define hipEventRecordDefault 0x00 + +/** Event is captured in the graph as an external event node when performing stream capture. */ +#define hipEventRecordExternal 0x01 + +//Flags that can be used with hipStreamWaitEvent. +/** Default flag. */ +#define hipEventWaitDefault 0x00 + +/** Wait is captured in the graph as an external event node when performing stream capture. */ +#define hipEventWaitExternal 0x01 + +/** Disable performing a system scope sequentially consistent memory fence when the event + * transitions from recording to recorded. This can be used for events that are only being + * used to measure timing, and do not require the event inspection operations + * (see ::hipEventSynchronize, ::hipEventQuery, and ::hipEventElapsedTime) to synchronize-with + * the work on which the recorded event (see ::hipEventRecord) is waiting. + * On some AMD GPU devices this can improve the accuracy of timing measurements by avoiding the + * cost of cache writeback and invalidation, and the performance impact of those actions on the + * execution of following work. */ +#define hipEventDisableSystemFence 0x20000000 + +/** Use a device-scope release when recording this event. This flag is useful to obtain more + * precise timings of commands between events. The flag is a no-op on CUDA platforms.*/ +#define hipEventReleaseToDevice 0x40000000 + +/** Use a system-scope release when recording this event. This flag is useful to make + * non-coherent host memory visible to the host. The flag is a no-op on CUDA platforms.*/ +#define hipEventReleaseToSystem 0x80000000 + +// Flags that can be used with hipGetDriverEntryPoint. +/** Default flag. Equivalent to hipEnablePerThreadDefaultStream if compiled with + * -fgpu-default-stream=per-thread flag or HIP_API_PER_THREAD_DEFAULT_STREAM macro is + * defined.*/ +#define hipEnableDefault 0x0 + +/** Search for all symbols except the corresponding per-thread versions.*/ +#define hipEnableLegacyStream 0x1 + +/** Search for all symbols including the per-thread versions. If a per-thread version cannot be + * found, returns the legacy version.*/ +#define hipEnablePerThreadDefaultStream 0x2 + +// Flags that can be used with hipHostMalloc/hipHostAlloc. +/** Default pinned memory allocation on the host.*/ +#define hipHostAllocDefault 0x0 + +/** Default pinned memory allocation on the host. + * @note This is the same definition as #hipHostAllocPortable.*/ +#define hipHostMallocDefault 0x0 + +/** Memory is considered allocated by all contexts.*/ +#define hipHostAllocPortable 0x1 + +/** Memory is considered allocated by all contexts. + * @note This is the same definition as #hipHostAllocPortable.*/ +#define hipHostMallocPortable 0x1 + +/** Map the allocation into the address space for the current device. The device pointer + * can be obtained with #hipHostGetDevicePointer.*/ +#define hipHostAllocMapped 0x2 + +/** Map the allocation into the address space for the current device. The device pointer + * can be obtained with #hipHostGetDevicePointer. + * @note This is the same #hipHostMallocMapped.*/ +#define hipHostMallocMapped 0x2 + +/** Allocates the memory as write-combined. On some system configurations, write-combined allocation + * may be transferred faster across the PCI Express bus, however, could have low read efficiency by + * most CPUs. It's a good option for data transfer from host to device via mapped pinned memory. + * @note This flag is only for CUDA source compatibility but not functional within HIP runtime, + * because the allocation path is currently not supported on the AMD platform.*/ +#define hipHostAllocWriteCombined 0x4 + +/** Allocates the memory as write-combined. On some system configurations, write-combined allocation + * may be transferred faster across the PCI Express bus, however, could have low read efficiency by + * most CPUs. It's a good option for data transfer from host to device via mapped pinned memory. + * @note This flag is the same definition as #hipHostAllocWriteCombined which is equivalent to + * cudaHostAllocWriteCombined. It is only for CUDA source compatibility but not functional within + * HIP runtime, because the allocation path is currently not supported on the AMD platform.*/ +#define hipHostMallocWriteCombined 0x4 + +/** + * Host memory will be forcedly allocated on extended fine grained system memory + * pool which is with MTYPE_UC. + * @note This allocation flag is applicable on AMD devices, except for Navi4X, in Linux only. + */ +#define hipHostMallocUncached 0x10000000 +#define hipHostAllocUncached hipHostMallocUncached + +/** + * Host memory allocation will follow numa policy set by user. + * @note This numa allocation flag is applicable on Linux, under development on Windows. + */ +#define hipHostMallocNumaUser 0x20000000 + +/** Allocate coherent memory. Overrides HIP_HOST_COHERENT for specific allocation.*/ +#define hipHostMallocCoherent 0x40000000 + +/** Allocate non-coherent memory. Overrides HIP_HOST_COHERENT for specific allocation.*/ +#define hipHostMallocNonCoherent 0x80000000 + +/** Memory can be accessed by any stream on any device*/ +#define hipMemAttachGlobal 0x01 + +/** Memory cannot be accessed by any stream on any device.*/ +#define hipMemAttachHost 0x02 + +/** Memory can only be accessed by a single stream on the associated device.*/ +#define hipMemAttachSingle 0x04 + +#define hipDeviceMallocDefault 0x0 + +/** Memory is allocated in fine grained region of device.*/ +#define hipDeviceMallocFinegrained 0x1 + +/** Memory represents a HSA signal.*/ +#define hipMallocSignalMemory 0x2 + +/** Memory allocated will be uncached. */ +#define hipDeviceMallocUncached 0x3 + +/** Memory allocated will be contiguous. */ +#define hipDeviceMallocContiguous 0x4 + +// Flags that can be used with hipHostRegister. +/** Memory is Mapped and Portable.*/ +#define hipHostRegisterDefault 0x0 + +/** Memory is considered registered by all contexts.*/ +#define hipHostRegisterPortable 0x1 + +/** Map the allocation into the address space for the current device. The device pointer + * can be obtained with #hipHostGetDevicePointer.*/ +#define hipHostRegisterMapped 0x2 + +/** The passed memory pointer is treated as pointing to some memory-mapped I/O space, e.g. + * belonging to a third-party PCIe device, and it will be marked as non cache-coherent and + * contiguous. + * */ +#define hipHostRegisterIoMemory 0x4 + +/** This flag is ignored On AMD devices.*/ +#define hipHostRegisterReadOnly 0x08 + +/** Coarse Grained host memory lock.*/ +#define hipExtHostRegisterCoarseGrained 0x8 + +/** Map host memory onto extended fine grained access host memory pool when enabled. + * It is applicable on AMD devices, except for Navi4X, in Linux only. + */ +#define hipExtHostRegisterUncached 0x80000000 + +/** Automatically select between Spin and Yield.*/ +#define hipDeviceScheduleAuto 0x0 + +/** Dedicate a CPU core to spin-wait. Provides lowest latency, but burns a CPU core and may + * consume more power.*/ +#define hipDeviceScheduleSpin 0x1 + +/** Yield the CPU to the operating system when waiting. May increase latency, but lowers power + * and is friendlier to other threads in the system.*/ +#define hipDeviceScheduleYield 0x2 +#define hipDeviceScheduleBlockingSync 0x4 +#define hipDeviceScheduleMask 0x7 +#define hipDeviceMapHost 0x8 +#define hipDeviceLmemResizeToMax 0x10 +/** Default HIP array allocation flag.*/ +#define hipArrayDefault 0x00 +#define hipArrayLayered 0x01 +#define hipArraySurfaceLoadStore 0x02 +#define hipArrayCubemap 0x04 +#define hipArrayTextureGather 0x08 +#define hipOccupancyDefault 0x00 +#define hipOccupancyDisableCachingOverride 0x01 +#define hipCooperativeLaunchMultiDeviceNoPreSync 0x01 +#define hipCooperativeLaunchMultiDeviceNoPostSync 0x02 +#define hipCpuDeviceId ((int)-1) +#define hipInvalidDeviceId ((int)-2) +// Flags that can be used with hipExtLaunch Set of APIs. +/** AnyOrderLaunch of kernels.*/ +#define hipExtAnyOrderLaunch 0x01 +// Flags to be used with hipStreamWaitValue32 and hipStreamWaitValue64. +#define hipStreamWaitValueGte 0x0 +#define hipStreamWaitValueEq 0x1 +#define hipStreamWaitValueAnd 0x2 +#define hipStreamWaitValueNor 0x3 + +/** Operations for hipStreamBatchMemOp*/ +typedef enum hipStreamBatchMemOpType { + hipStreamMemOpWaitValue32 = 0x1, + hipStreamMemOpWriteValue32 = 0x2, + hipStreamMemOpWaitValue64 = 0x4, + hipStreamMemOpWriteValue64 = 0x5, + hipStreamMemOpBarrier = 0x6, ///< Currently not supported + hipStreamMemOpFlushRemoteWrites = 0x3 ///< Currently not supported +} hipStreamBatchMemOpType; + +/** + * @brief Union representing batch memory operation parameters for HIP streams. + * + * hipStreamBatchMemOpParams is used to specify the parameters for batch memory + * operations in a HIP stream. This union supports various operations including + * waiting for a specific value, writing a value, and different flags for wait conditions. + * + * @details + * The union includes fields for different types of operations defined in the + * enum hipStreamBatchMemOpType: + * - hipStreamMemOpWaitValue32: Wait for a 32-bit value. + * - hipStreamMemOpWriteValue32: Write a 32-bit value. + * - hipStreamMemOpWaitValue64: Wait for a 64-bit value. + * - hipStreamMemOpWriteValue64: Write a 64-bit value. + * + * Each operation type includes an address, the value to wait for or write, flags, and an + * optional alias that is not relevant on AMD GPUs. Flags can be used to specify different + * wait conditions such as equality, bitwise AND, greater than or equal, and bitwise NOR. + * + * Example usage: + * @code + * hipStreamBatchMemOpParams myArray[2]; + * myArray[0].operation = hipStreamMemOpWaitValue32; + * myArray[0].waitValue.address = waitAddr1; + * myArray[0].waitValue.value = 0x1; + * myArray[0].waitValue.flags = CU_STREAM_WAIT_VALUE_EQ; + * + * myArray[1].operation = hipStreamMemOpWriteValue32; + * myArray[1].writeValue.address = writeAddr1; + * myArray[1].writeValue.value = 0x1; + * myArray[1].writeValue.flags = 0x0; + * + * result = hipStreamBatchMemOp(stream, 2, myArray, 0); + * @endcode + */ + +typedef union hipStreamBatchMemOpParams_union { + hipStreamBatchMemOpType operation; + struct hipStreamMemOpWaitValueParams_t { + hipStreamBatchMemOpType operation; + hipDeviceptr_t address; + union { + uint32_t value; + uint64_t value64; + }; + unsigned int flags; + hipDeviceptr_t alias; ///< Not valid for AMD backend. Initial value is unimportant + } waitValue; + struct hipStreamMemOpWriteValueParams_t { + hipStreamBatchMemOpType operation; + hipDeviceptr_t address; + union { + uint32_t value; + uint64_t value64; + }; + unsigned int flags; + hipDeviceptr_t alias; ///< Not valid for AMD backend. Initial value is unimportant + } writeValue; + struct hipStreamMemOpFlushRemoteWritesParams_t { + hipStreamBatchMemOpType operation; + unsigned int flags; + } flushRemoteWrites; ///< Currently not supported on AMD + struct hipStreamMemOpMemoryBarrierParams_t { + hipStreamBatchMemOpType operation; + unsigned int flags; + } memoryBarrier; ///< Currently not supported on AMD + uint64_t pad[6]; +} hipStreamBatchMemOpParams; + +/** + * @brief Structure representing node parameters for batch memory operations in HIP graphs. + * + * hipBatchMemOpNodeParams is used to specify the parameters for batch memory + * operations in HIP graphs. This struct includes the context to use for the operations, the + * number of operations, and an array of hipStreamBatchMemOpParams that describe the operations. + * + * @details + * The structure includes the following fields: + * - ctx: The HIP context to use for the operations. + * - count: The number of operations in the paramArray. + * - paramArray: A pointer to an array of hipStreamBatchMemOpParams. + * - flags: Flags to control the node. + * + * Example usage: + * @code + * hipBatchMemOpNodeParams nodeParams; + * nodeParams.ctx = context; + * nodeParams.count = ARRAY_SIZE; + * nodeParams.paramArray = myArray; + * nodeParams.flags = 0; + * + * Pass nodeParams to a HIP graph APIs hipGraphAddBatchMemOpNode, hipGraphBatchMemOpNodeGetParams, + * hipGraphBatchMemOpNodeSetParams, hipGraphExecBatchMemOpNodeSetParams + * @endcode + */ + +typedef struct hipBatchMemOpNodeParams { + hipCtx_t ctx; + unsigned int count; + hipStreamBatchMemOpParams* paramArray; + unsigned int flags; +} hipBatchMemOpNodeParams; + +// Stream per thread +/** Implicit stream per application thread.*/ +#define hipStreamPerThread ((hipStream_t)2) + +#define hipStreamLegacy ((hipStream_t)1) + +// Indicates that the external memory object is a dedicated resource +#define hipExternalMemoryDedicated 0x1 +/** + * HIP Memory Advise values + * + * @note This memory advise enumeration is used on Linux, not Windows. + */ +typedef enum hipMemoryAdvise { + hipMemAdviseSetReadMostly = 1, ///< Data will mostly be read and only occassionally + ///< be written to + hipMemAdviseUnsetReadMostly = 2, ///< Undo the effect of hipMemAdviseSetReadMostly + hipMemAdviseSetPreferredLocation = 3, ///< Set the preferred location for the data as + ///< the specified device + hipMemAdviseUnsetPreferredLocation = 4, ///< Clear the preferred location for the data + hipMemAdviseSetAccessedBy = 5, ///< Data will be accessed by the specified device + ///< so prevent page faults as much as possible + hipMemAdviseUnsetAccessedBy = 6, ///< Let HIP to decide on the page faulting policy + ///< for the specified device + hipMemAdviseSetCoarseGrain = 100, ///< The default memory model is fine-grain. That allows + ///< coherent operations between host and device, while + ///< executing kernels. The coarse-grain can be used + ///< for data that only needs to be coherent at dispatch + ///< boundaries for better performance + hipMemAdviseUnsetCoarseGrain = 101 ///< Restores cache coherency policy back to fine-grain +} hipMemoryAdvise; +/** + * HIP Coherency Mode + */ +typedef enum hipMemRangeCoherencyMode { + hipMemRangeCoherencyModeFineGrain = 0, ///< Updates to memory with this attribute can be + ///< done coherently from all devices + hipMemRangeCoherencyModeCoarseGrain = 1, ///< Writes to memory with this attribute can be + ///< performed by a single device at a time + hipMemRangeCoherencyModeIndeterminate = 2 ///< Memory region queried contains subregions with + ///< both hipMemRangeCoherencyModeFineGrain and + ///< hipMemRangeCoherencyModeCoarseGrain attributes +} hipMemRangeCoherencyMode; +/** + * HIP range attributes + */ +typedef enum hipMemRangeAttribute { + hipMemRangeAttributeReadMostly = 1, ///< Whether the range will mostly be read and + ///< only occassionally be written to + hipMemRangeAttributePreferredLocation = 2, ///< The preferred location of the range + hipMemRangeAttributeAccessedBy = 3, ///< Memory range has hipMemAdviseSetAccessedBy + ///< set for the specified device + hipMemRangeAttributeLastPrefetchLocation = 4, ///< The last location to where the range was + ///< prefetched + hipMemRangeAttributeCoherencyMode = 100, ///< Returns coherency mode + ///< @ref hipMemRangeCoherencyMode for the range +} hipMemRangeAttribute; + +/** + * HIP memory pool attributes + */ +typedef enum hipMemPoolAttr { + /** + * (value type = int) + * Allow @p hipMemAllocAsync to use memory asynchronously freed + * in another streams as long as a stream ordering dependency + * of the allocating stream on the free action exists. + * hip events and null stream interactions can create the required + * stream ordered dependencies. (default enabled) + */ + hipMemPoolReuseFollowEventDependencies = 0x1, + /** + * (value type = int) + * Allow reuse of already completed frees when there is no dependency + * between the free and allocation. (default enabled) + */ + hipMemPoolReuseAllowOpportunistic = 0x2, + /** + * (value type = int) + * Allow @p hipMemAllocAsync to insert new stream dependencies + * in order to establish the stream ordering required to reuse + * a piece of memory released by cuFreeAsync (default enabled). + */ + hipMemPoolReuseAllowInternalDependencies = 0x3, + /** + * (value type = uint64_t) + * Amount of reserved memory in bytes to hold onto before trying + * to release memory back to the OS. When more than the release + * threshold bytes of memory are held by the memory pool, the + * allocator will try to release memory back to the OS on the + * next call to stream, event or context synchronize. (default 0) + */ + hipMemPoolAttrReleaseThreshold = 0x4, + /** + * (value type = uint64_t) + * Amount of backing memory currently allocated for the mempool. + */ + hipMemPoolAttrReservedMemCurrent = 0x5, + /** + * (value type = uint64_t) + * High watermark of backing memory allocated for the mempool since the + * last time it was reset. High watermark can only be reset to zero. + */ + hipMemPoolAttrReservedMemHigh = 0x6, + /** + * (value type = uint64_t) + * Amount of memory from the pool that is currently in use by the application. + */ + hipMemPoolAttrUsedMemCurrent = 0x7, + /** + * (value type = uint64_t) + * High watermark of the amount of memory from the pool that was in use by the application since + * the last time it was reset. High watermark can only be reset to zero. + */ + hipMemPoolAttrUsedMemHigh = 0x8 +} hipMemPoolAttr; + +/** + * Specifies the memory protection flags for mapping + * + */ +typedef enum hipMemAccessFlags { + hipMemAccessFlagsProtNone = 0, ///< Default, make the address range not accessible + hipMemAccessFlagsProtRead = 1, ///< Set the address range read accessible + hipMemAccessFlagsProtReadWrite = 3 ///< Set the address range read-write accessible +} hipMemAccessFlags; +/** + * Memory access descriptor structure is used to specify memory access + * permissions for a virtual memory region in Virtual Memory Management API. + * This structure changes read, and write permissions for + * specific memory regions. + */ +typedef struct hipMemAccessDesc { + hipMemLocation location; ///< Location on which the accessibility has to change + hipMemAccessFlags flags; ///< Accessibility flags to set +} hipMemAccessDesc; +/** + * Defines the allocation types + */ +typedef enum hipMemAllocationType { + hipMemAllocationTypeInvalid = 0x0, + /** This allocation type is 'pinned', i.e. cannot migrate from its current + * location while the application is actively using it + */ + hipMemAllocationTypePinned = 0x1, + hipMemAllocationTypeManaged = 0x2, + hipMemAllocationTypeUncached = 0x40000000, + hipMemAllocationTypeMax = 0x7FFFFFFF +} hipMemAllocationType; +/** + * Flags for specifying handle types for memory pool allocations + * + */ +typedef enum hipMemAllocationHandleType { + hipMemHandleTypeNone = 0x0, ///< Does not allow any export mechanism + hipMemHandleTypePosixFileDescriptor = + 0x1, ///< Allows a file descriptor for exporting. Permitted only on POSIX systems + hipMemHandleTypeWin32 = 0x2, ///< Allows a Win32 NT handle for exporting. (HANDLE) + hipMemHandleTypeWin32Kmt = 0x4 ///< Allows a Win32 KMT handle for exporting. (D3DKMT_HANDLE) +} hipMemAllocationHandleType; +/** + * Specifies the properties of allocations made from the pool. + */ +typedef struct hipMemPoolProps { + hipMemAllocationType + allocType; ///< Allocation type. Currently must be specified as @p hipMemAllocationTypePinned + hipMemAllocationHandleType + handleTypes; ///< Handle types that will be supported by allocations from the pool + hipMemLocation location; ///< Location where allocations should reside + /** + * Windows-specific LPSECURITYATTRIBUTES required when @p hipMemHandleTypeWin32 is specified + */ + void* win32SecurityAttributes; + size_t maxSize; ///< Maximum pool size. When set to 0, defaults to a system dependent value + unsigned char reserved[56]; ///< Reserved for future use, must be 0 +} hipMemPoolProps; +/** + * Opaque data structure for exporting a pool allocation + */ +typedef struct hipMemPoolPtrExportData { + unsigned char reserved[64]; +} hipMemPoolPtrExportData; + +/** + * @warning On AMD devices and some Nvidia devices, these hints and controls are ignored. + */ +typedef enum hipFuncAttribute { + hipFuncAttributeMaxDynamicSharedMemorySize = + 8, ///< The maximum number of bytes requested for dynamically allocated shared memory + hipFuncAttributePreferredSharedMemoryCarveout = + 9, ///< Sets the percentage of total shared memory allocated as the shared memory carveout + hipFuncAttributeMax +} hipFuncAttribute; +/** + * @warning On AMD devices and some Nvidia devices, these hints and controls are ignored. + */ +typedef enum hipFuncCache_t { + hipFuncCachePreferNone, ///< no preference for shared memory or L1 (default) + hipFuncCachePreferShared, ///< prefer larger shared memory and smaller L1 cache + hipFuncCachePreferL1, ///< prefer larger L1 cache and smaller shared memory + hipFuncCachePreferEqual, ///< prefer equal size L1 cache and shared memory +} hipFuncCache_t; +/** + * @warning On AMD devices and some Nvidia devices, these hints and controls are ignored. + */ +typedef enum hipSharedMemConfig { + hipSharedMemBankSizeDefault, ///< The compiler selects a device-specific value for the banking. + hipSharedMemBankSizeFourByte, ///< Shared mem is banked at 4-bytes intervals and performs best + ///< when adjacent threads access data 4 bytes apart. + hipSharedMemBankSizeEightByte ///< Shared mem is banked at 8-byte intervals and performs best + ///< when adjacent threads access data 4 bytes apart. +} hipSharedMemConfig; +/** + * Struct for data in 3D + */ +typedef struct dim3 { + uint32_t x; ///< x + uint32_t y; ///< y + uint32_t z; ///< z +#ifdef __cplusplus + constexpr __host__ __device__ dim3(uint32_t _x = 1, uint32_t _y = 1, uint32_t _z = 1) + : x(_x), y(_y), z(_z) {}; +#endif +} dim3; +/** + * struct hipLaunchParams_t + */ +typedef struct hipLaunchParams_t { + void* func; ///< Device function symbol + dim3 gridDim; ///< Grid dimensions + dim3 blockDim; ///< Block dimensions + void** args; ///< Arguments + size_t sharedMem; ///< Shared memory + hipStream_t stream; ///< Stream identifier +} hipLaunchParams; +/** + * struct hipFunctionLaunchParams_t + */ +typedef struct hipFunctionLaunchParams_t { + hipFunction_t function; ///< Kernel to launch + unsigned int gridDimX; ///< Width(X) of grid in blocks + unsigned int gridDimY; ///< Height(Y) of grid in blocks + unsigned int gridDimZ; ///< Depth(Z) of grid in blocks + unsigned int blockDimX; ///< X dimension of each thread block + unsigned int blockDimY; ///< Y dimension of each thread block + unsigned int blockDimZ; ///< Z dimension of each thread block + unsigned int sharedMemBytes; ///< Shared memory + hipStream_t hStream; ///< Stream identifier + void** kernelParams; ///< Kernel parameters +} hipFunctionLaunchParams; +typedef enum hipExternalMemoryHandleType_enum { + hipExternalMemoryHandleTypeOpaqueFd = 1, + hipExternalMemoryHandleTypeOpaqueWin32 = 2, + hipExternalMemoryHandleTypeOpaqueWin32Kmt = 3, + hipExternalMemoryHandleTypeD3D12Heap = 4, + hipExternalMemoryHandleTypeD3D12Resource = 5, + hipExternalMemoryHandleTypeD3D11Resource = 6, + hipExternalMemoryHandleTypeD3D11ResourceKmt = 7, + hipExternalMemoryHandleTypeNvSciBuf = 8 +} hipExternalMemoryHandleType; +typedef struct hipExternalMemoryHandleDesc_st { + hipExternalMemoryHandleType type; + union { + int fd; + struct { + void* handle; + const void* name; + } win32; + const void* nvSciBufObject; + } handle; + unsigned long long size; + unsigned int flags; + unsigned int reserved[16]; +} hipExternalMemoryHandleDesc; +typedef struct hipExternalMemoryBufferDesc_st { + unsigned long long offset; + unsigned long long size; + unsigned int flags; + unsigned int reserved[16]; +} hipExternalMemoryBufferDesc; +typedef struct hipExternalMemoryMipmappedArrayDesc_st { + unsigned long long offset; + hipChannelFormatDesc formatDesc; + hipExtent extent; + unsigned int flags; + unsigned int numLevels; +} hipExternalMemoryMipmappedArrayDesc; +typedef void* hipExternalMemory_t; +typedef enum hipExternalSemaphoreHandleType_enum { + hipExternalSemaphoreHandleTypeOpaqueFd = 1, + hipExternalSemaphoreHandleTypeOpaqueWin32 = 2, + hipExternalSemaphoreHandleTypeOpaqueWin32Kmt = 3, + hipExternalSemaphoreHandleTypeD3D12Fence = 4, + hipExternalSemaphoreHandleTypeD3D11Fence = 5, + hipExternalSemaphoreHandleTypeNvSciSync = 6, + hipExternalSemaphoreHandleTypeKeyedMutex = 7, + hipExternalSemaphoreHandleTypeKeyedMutexKmt = 8, + hipExternalSemaphoreHandleTypeTimelineSemaphoreFd = 9, + hipExternalSemaphoreHandleTypeTimelineSemaphoreWin32 = 10 +} hipExternalSemaphoreHandleType; +typedef struct hipExternalSemaphoreHandleDesc_st { + hipExternalSemaphoreHandleType type; + union { + int fd; + struct { + void* handle; + const void* name; + } win32; + const void* NvSciSyncObj; + } handle; + unsigned int flags; + unsigned int reserved[16]; +} hipExternalSemaphoreHandleDesc; +typedef void* hipExternalSemaphore_t; +typedef struct hipExternalSemaphoreSignalParams_st { + struct { + struct { + unsigned long long value; + } fence; + union { + void* fence; + unsigned long long reserved; + } nvSciSync; + struct { + unsigned long long key; + } keyedMutex; + unsigned int reserved[12]; + } params; + unsigned int flags; + unsigned int reserved[16]; +} hipExternalSemaphoreSignalParams; +/** + * External semaphore wait parameters, compatible with driver type + */ +typedef struct hipExternalSemaphoreWaitParams_st { + struct { + struct { + unsigned long long value; + } fence; + union { + void* fence; + unsigned long long reserved; + } nvSciSync; + struct { + unsigned long long key; + unsigned int timeoutMs; + } keyedMutex; + unsigned int reserved[10]; + } params; + unsigned int flags; + unsigned int reserved[16]; +} hipExternalSemaphoreWaitParams; + +#if __HIP_HAS_GET_PCH +/** + * Internal use only. This API may change in the future + * Pre-Compiled header for online compilation + */ +void __hipGetPCH(const char** pch, unsigned int* size); +#endif + +/** + * HIP Access falgs for Interop resources. + */ +typedef enum hipGraphicsRegisterFlags { + hipGraphicsRegisterFlagsNone = 0, + hipGraphicsRegisterFlagsReadOnly = 1, ///< HIP will not write to this registered resource, read only + hipGraphicsRegisterFlagsWriteDiscard = + 2, ///< HIP will only write and will not read from this registered resource, write only + hipGraphicsRegisterFlagsSurfaceLoadStore = 4, ///< HIP will bind this resource to a surface, read and write + hipGraphicsRegisterFlagsTextureGather = + 8 ///< HIP will perform texture gather operations on this registered resource, read and write or read only +} hipGraphicsRegisterFlags; + +typedef struct _hipGraphicsResource hipGraphicsResource; + +typedef hipGraphicsResource* hipGraphicsResource_t; + +/** + * An opaque value that represents a hip graph + */ +typedef struct ihipGraph* hipGraph_t; +/** + * An opaque value that represents a hip graph node + */ +typedef struct hipGraphNode* hipGraphNode_t; +/** + * An opaque value that represents a hip graph Exec + */ +typedef struct hipGraphExec* hipGraphExec_t; + +/** + * An opaque value that represents a user obj + */ +typedef struct hipUserObject* hipUserObject_t; + + +/** + * hipGraphNodeType + */ +typedef enum hipGraphNodeType { + hipGraphNodeTypeKernel = 0, ///< GPU kernel node + hipGraphNodeTypeMemcpy = 1, ///< Memcpy node + hipGraphNodeTypeMemset = 2, ///< Memset node + hipGraphNodeTypeHost = 3, ///< Host (executable) node + hipGraphNodeTypeGraph = 4, ///< Node which executes an embedded graph + hipGraphNodeTypeEmpty = 5, ///< Empty (no-op) node + hipGraphNodeTypeWaitEvent = 6, ///< External event wait node + hipGraphNodeTypeEventRecord = 7, ///< External event record node + hipGraphNodeTypeExtSemaphoreSignal = 8, ///< External Semaphore signal node + hipGraphNodeTypeExtSemaphoreWait = 9, ///< External Semaphore wait node + hipGraphNodeTypeMemAlloc = 10, ///< Memory alloc node + hipGraphNodeTypeMemFree = 11, ///< Memory free node + hipGraphNodeTypeMemcpyFromSymbol = 12, ///< MemcpyFromSymbol node + hipGraphNodeTypeMemcpyToSymbol = 13, ///< MemcpyToSymbol node + hipGraphNodeTypeBatchMemOp = 14, ///< BatchMemOp node + hipGraphNodeTypeCount +} hipGraphNodeType; + +typedef void (*hipHostFn_t)(void* userData); +typedef struct hipHostNodeParams { + hipHostFn_t fn; + void* userData; +} hipHostNodeParams; +typedef struct hipKernelNodeParams { + dim3 blockDim; + void** extra; + void* func; + dim3 gridDim; + void** kernelParams; + unsigned int sharedMemBytes; +} hipKernelNodeParams; +typedef struct hipMemsetParams { + void* dst; + unsigned int elementSize; + size_t height; + size_t pitch; + unsigned int value; + size_t width; +} hipMemsetParams; + +typedef struct hipMemAllocNodeParams { + hipMemPoolProps poolProps; ///< Pool properties, which contain where + ///< the location should reside + const hipMemAccessDesc* accessDescs; ///< The number of memory access descriptors. + size_t accessDescCount; ///< The number of access descriptors. + ///< Must not be bigger than the number of GPUs + size_t bytesize; ///< The size of the requested allocation in bytes + void* dptr; ///< Returned device address of the allocation +} hipMemAllocNodeParams; + +/** + * Specifies performance hint with hipAccessPolicyWindow + */ +typedef enum hipAccessProperty { + hipAccessPropertyNormal = 0, ///< Normal cache persistence. + hipAccessPropertyStreaming = 1, ///< Streaming access is less likely to persist from cache + hipAccessPropertyPersisting = 2, ///< Persisting access is more likely to persist in cache +} hipAccessProperty; + +/*** + * Specifies access policy for a window, a contiguous extent of memory + * beginning at base_ptr and ending at base_ptr + num_bytes. + */ +typedef struct hipAccessPolicyWindow { + void* base_ptr; ///< Starting address of the access policy window + hipAccessProperty hitProp; ///< hipAccessProperty set for hit + float hitRatio; ///< hitRatio specifies percentage of lines assigned hitProp + hipAccessProperty missProp; ///< hipAccessProperty set for miss + size_t num_bytes; ///< Size in bytes of the window policy. +} hipAccessPolicyWindow; + +/** + * Memory Synchronization Domain map + */ +typedef struct hipLaunchMemSyncDomainMap { + unsigned char default_; /**< The default domain ID to use for designated kernels */ + unsigned char remote; /**< The remote domain ID to use for designated kernels */ +} hipLaunchMemSyncDomainMap; + +/** + * Memory Synchronization Domain + */ +typedef enum hipLaunchMemSyncDomain { + hipLaunchMemSyncDomainDefault = 0, /**< Launch kernels in the default domain */ + hipLaunchMemSyncDomainRemote = 1 /**< Launch kernels in the remote domain */ +} hipLaunchMemSyncDomain; + +/** + * Stream Synchronization Policy. + * Can be set with hipStreamSetAttribute + */ +typedef enum hipSynchronizationPolicy { + hipSyncPolicyAuto = 1, /**< Default Synchronization Policy. Host thread waits actively */ + hipSyncPolicySpin = 2, /**< Host thread spins in tight loop waiting for completition */ + hipSyncPolicyYield = 3, /**< Host spins but yields to other threads, reducing CPU usage */ + hipSyncPolicyBlockingSync = 4 /**< Host thread blocks (sleeps) until the stream completes */ +} hipSynchronizationPolicy; + +/** + * Launch Attribute ID + */ +typedef enum hipLaunchAttributeID { + hipLaunchAttributeAccessPolicyWindow = 1, ///< Valid for Streams, graph nodes, launches + hipLaunchAttributeCooperative = 2, ///< Valid for graph nodes, launches + hipLaunchAttributeSynchronizationPolicy = 3, ///< Valid for streams + hipLaunchAttributePriority = 8, ///< Valid for graph node, streams, launches + hipLaunchAttributeMemSyncDomainMap = 9, ///< Valid for streams, graph nodes, launches + hipLaunchAttributeMemSyncDomain = 10, ///< Valid for streams, graph nodes, launches + hipLaunchAttributeMax +} hipLaunchAttributeID; + + +/** + * Launch Attribute Value + */ +typedef union hipLaunchAttributeValue { + char pad[64]; ///< 64 byte padding + hipAccessPolicyWindow + accessPolicyWindow; ///< Value of launch attribute ::hipLaunchAttributeAccessPolicyWindow. + int cooperative; ///< Value of launch attribute ::hipLaunchAttributeCooperative. Indicates + ///< whether the kernel is cooperative. + int priority; ///< Value of launch attribute :: hipLaunchAttributePriority. Execution priority of + ///< kernel + hipSynchronizationPolicy + syncPolicy; ///< Value of launch attribute :: hipLaunchAttributeSynchronizationPolicy. Used + ///< to work queued up in stream + hipLaunchMemSyncDomainMap + memSyncDomainMap; ///< Value of launch attribute hipLaunchAttributeMemSyncDomainMap + hipLaunchMemSyncDomain + memSyncDomain; ///< Value of launch attribute hipLaunchAttributeMemSyncDomain +} hipLaunchAttributeValue; + +/** + * Stream attributes + */ +#define hipStreamAttrID hipLaunchAttributeID +#define hipStreamAttributeAccessPolicyWindow hipLaunchAttributeAccessPolicyWindow +#define hipStreamAttributeSynchronizationPolicy hipLaunchAttributeSynchronizationPolicy +#define hipStreamAttributeMemSyncDomainMap hipLaunchAttributeMemSyncDomainMap +#define hipStreamAttributeMemSyncDomain hipLaunchAttributeMemSyncDomain +#define hipStreamAttributePriority hipLaunchAttributePriority + +#define hipStreamAttrValue hipLaunchAttributeValue + +/** + * Kernel node attributeID + */ +#define hipKernelNodeAttrID hipLaunchAttributeID +#define hipKernelNodeAttributeAccessPolicyWindow hipLaunchAttributeAccessPolicyWindow +#define hipKernelNodeAttributeCooperative hipLaunchAttributeCooperative +#define hipKernelNodeAttributePriority hipLaunchAttributePriority + +/** + * Kernel node attribute value + */ +#define hipKernelNodeAttrValue hipLaunchAttributeValue + +/** + * hip Drv attributes + */ +#define hipDrvLaunchAttributeCooperative hipLaunchAttributeCooperative + +#define hipDrvLaunchAttributeID hipLaunchAttributeID +#define hipDrvLaunchAttributeValue hipLaunchAttributeValue +#define hipDrvLaunchAttribute hipLaunchAttribute + +/** + * Graph execution update result + */ +typedef enum hipGraphExecUpdateResult { + hipGraphExecUpdateSuccess = 0x0, ///< The update succeeded + hipGraphExecUpdateError = 0x1, ///< The update failed for an unexpected reason which is described + ///< in the return value of the function + hipGraphExecUpdateErrorTopologyChanged = 0x2, ///< The update failed because the topology changed + hipGraphExecUpdateErrorNodeTypeChanged = 0x3, ///< The update failed because a node type changed + hipGraphExecUpdateErrorFunctionChanged = + 0x4, ///< The update failed because the function of a kernel node changed + hipGraphExecUpdateErrorParametersChanged = + 0x5, ///< The update failed because the parameters changed in a way that is not supported + hipGraphExecUpdateErrorNotSupported = + 0x6, ///< The update failed because something about the node is not supported + hipGraphExecUpdateErrorUnsupportedFunctionChange = 0x7 +} hipGraphExecUpdateResult; + +typedef enum hipStreamCaptureMode { + hipStreamCaptureModeGlobal = 0, + hipStreamCaptureModeThreadLocal, + hipStreamCaptureModeRelaxed +} hipStreamCaptureMode; +typedef enum hipStreamCaptureStatus { + hipStreamCaptureStatusNone = 0, ///< Stream is not capturing + hipStreamCaptureStatusActive, ///< Stream is actively capturing + hipStreamCaptureStatusInvalidated ///< Stream is part of a capture sequence that has been + ///< invalidated, but not terminated +} hipStreamCaptureStatus; + +typedef enum hipStreamUpdateCaptureDependenciesFlags { + hipStreamAddCaptureDependencies = 0, ///< Add new nodes to the dependency set + hipStreamSetCaptureDependencies, ///< Replace the dependency set with the new nodes +} hipStreamUpdateCaptureDependenciesFlags; + +typedef enum hipGraphMemAttributeType { + hipGraphMemAttrUsedMemCurrent = + 0, ///< Amount of memory, in bytes, currently associated with graphs + hipGraphMemAttrUsedMemHigh, ///< High watermark of memory, in bytes, associated with graphs since + ///< the last time. + hipGraphMemAttrReservedMemCurrent, ///< Amount of memory, in bytes, currently allocated for + ///< graphs. + hipGraphMemAttrReservedMemHigh, ///< High watermark of memory, in bytes, currently allocated for + ///< graphs +} hipGraphMemAttributeType; +typedef enum hipUserObjectFlags { + hipUserObjectNoDestructorSync = 0x1, ///< Destructor execution is not synchronized. +} hipUserObjectFlags; + +typedef enum hipUserObjectRetainFlags { + hipGraphUserObjectMove = 0x1, ///< Add new reference or retain. +} hipUserObjectRetainFlags; + +typedef enum hipGraphInstantiateFlags { + hipGraphInstantiateFlagAutoFreeOnLaunch = + 1, ///< Automatically free memory allocated in a graph before relaunching. + hipGraphInstantiateFlagUpload = 2, ///< Automatically upload the graph after instantiation. + hipGraphInstantiateFlagDeviceLaunch = + 4, ///< Instantiate the graph to be launched from the device. + hipGraphInstantiateFlagUseNodePriority = + 8, ///< Run the graph using the per-node priority attributes rather than the priority of the + ///< stream it is launched into. +} hipGraphInstantiateFlags; + +enum hipGraphDebugDotFlags { + hipGraphDebugDotFlagsVerbose = + 1 << 0, /**< Output all debug data as if every debug flag is enabled */ + hipGraphDebugDotFlagsKernelNodeParams = 1 << 2, /**< Adds hipKernelNodeParams to output */ + hipGraphDebugDotFlagsMemcpyNodeParams = 1 << 3, /**< Adds hipMemcpy3DParms to output */ + hipGraphDebugDotFlagsMemsetNodeParams = 1 << 4, /**< Adds hipMemsetParams to output */ + hipGraphDebugDotFlagsHostNodeParams = 1 << 5, /**< Adds hipHostNodeParams to output */ + hipGraphDebugDotFlagsEventNodeParams = + 1 << 6, /**< Adds hipEvent_t handle from record and wait nodes to output */ + hipGraphDebugDotFlagsExtSemasSignalNodeParams = + 1 << 7, /**< Adds hipExternalSemaphoreSignalNodeParams values to output */ + hipGraphDebugDotFlagsExtSemasWaitNodeParams = + 1 << 8, /**< Adds hipExternalSemaphoreWaitNodeParams to output */ + hipGraphDebugDotFlagsKernelNodeAttributes = + 1 << 9, /**< Adds hipKernelNodeAttrID values to output */ + hipGraphDebugDotFlagsHandles = + 1 << 10 /**< Adds node handles and every kernel function handle to output */ +}; + +/** + * hipGraphInstantiateWithParams results + */ +typedef enum hipGraphInstantiateResult { + hipGraphInstantiateSuccess = 0, /**< Instantiation Success */ + hipGraphInstantiateError = 1, /**< Instantiation failed for an + unexpected reason which is described in the return value of the function */ + hipGraphInstantiateInvalidStructure = 2, /**< Instantiation failed due + to invalid structure, such as cycles */ + hipGraphInstantiateNodeOperationNotSupported = 3, /**< Instantiation for device launch failed + because the graph contained an unsupported operation */ + hipGraphInstantiateMultipleDevicesNotSupported = 4, /**< Instantiation for device launch failed + due to the nodes belonging to different contexts */ +} hipGraphInstantiateResult; + +/** + * Graph Instantiation parameters + */ +typedef struct hipGraphInstantiateParams { + hipGraphNode_t errNode_out; /**< The node which caused instantiation to fail, if any*/ + unsigned long long flags; /**< Instantiation flags */ + hipGraphInstantiateResult result_out; /**< Whether instantiation was successful. + If it failed, the reason why */ + hipStream_t uploadStream; /**< Upload stream */ +} hipGraphInstantiateParams; + + +/** + * Memory allocation properties + */ +typedef struct hipMemAllocationProp { + hipMemAllocationType type; ///< Memory allocation type + union { + hipMemAllocationHandleType requestedHandleType; ///< Requested handle type + hipMemAllocationHandleType requestedHandleTypes; ///< Requested handle types + }; + hipMemLocation location; ///< Memory location + void* win32HandleMetaData; ///< Metadata for Win32 handles + struct { + unsigned char compressionType; ///< Compression type + unsigned char gpuDirectRDMACapable; ///< RDMA capable + unsigned short usage; ///< Usage + } allocFlags; +} hipMemAllocationProp; + +/** + * External semaphore signal node parameters + */ +typedef struct hipExternalSemaphoreSignalNodeParams { + ///< Array containing external semaphore handles. + hipExternalSemaphore_t* extSemArray; + ///< Array containing parameters of external signal semaphore. + const hipExternalSemaphoreSignalParams* paramsArray; + ///< Total number of handles and parameters contained in extSemArray and paramsArray. + unsigned int numExtSems; +} hipExternalSemaphoreSignalNodeParams; + +/** + * External semaphore wait node parameters + */ +typedef struct hipExternalSemaphoreWaitNodeParams { + ///< Array containing external semaphore handles. + hipExternalSemaphore_t* extSemArray; + ///< Array containing parameters of external wait semaphore. + const hipExternalSemaphoreWaitParams* paramsArray; + ///< Total number of handles and parameters contained in extSemArray and paramsArray. + unsigned int numExtSems; +} hipExternalSemaphoreWaitNodeParams; + +/** + * Generic handle for memory allocation + */ +typedef struct ihipMemGenericAllocationHandle* hipMemGenericAllocationHandle_t; + +/** + * Flags for granularity + */ +typedef enum hipMemAllocationGranularity_flags { + hipMemAllocationGranularityMinimum = 0x0, ///< Minimum granularity + hipMemAllocationGranularityRecommended = 0x1 ///< Recommended granularity for performance +} hipMemAllocationGranularity_flags; + +/** + * Memory handle type + */ +typedef enum hipMemHandleType { + hipMemHandleTypeGeneric = 0x0 ///< Generic handle type +} hipMemHandleType; + +/** + * Memory operation types + */ +typedef enum hipMemOperationType { + hipMemOperationTypeMap = 0x1, ///< Map operation + hipMemOperationTypeUnmap = 0x2 ///< Unmap operation +} hipMemOperationType; + +/** + * Subresource types for sparse arrays + */ +typedef enum hipArraySparseSubresourceType { + hipArraySparseSubresourceTypeSparseLevel = 0x0, ///< Sparse level + hipArraySparseSubresourceTypeMiptail = 0x1 ///< Miptail +} hipArraySparseSubresourceType; + +/** + * Map info for arrays + */ +typedef struct hipArrayMapInfo { + hipResourceType resourceType; ///< Resource type + union { + hipMipmappedArray mipmap; + hipArray_t array; + } resource; + hipArraySparseSubresourceType subresourceType; ///< Sparse subresource type + union { + struct { + unsigned int + level; ///< For mipmapped arrays must be a valid mipmap level. For arrays must be zero + unsigned int + layer; ///< For layered arrays must be a valid layer index. Otherwise, must be zero + unsigned int offsetX; ///< X offset in elements + unsigned int offsetY; ///< Y offset in elements + unsigned int offsetZ; ///< Z offset in elements + unsigned int extentWidth; ///< Width in elements + unsigned int extentHeight; ///< Height in elements + unsigned int extentDepth; ///< Depth in elements + } sparseLevel; + struct { + unsigned int + layer; ///< For layered arrays must be a valid layer index. Otherwise, must be zero + unsigned long long offset; ///< Offset within mip tail + unsigned long long size; ///< Extent in bytes + } miptail; + } subresource; + hipMemOperationType memOperationType; ///< Memory operation type + hipMemHandleType memHandleType; ///< Memory handle type + union { + hipMemGenericAllocationHandle_t memHandle; + } memHandle; + unsigned long long offset; ///< Offset within the memory + unsigned int deviceBitMask; ///< Device ordinal bit mask + unsigned int flags; ///< flags for future use, must be zero now. + unsigned int reserved[2]; ///< Reserved for future use, must be zero now. +} hipArrayMapInfo; + +/** + * Memcpy node params + */ +typedef struct hipMemcpyNodeParams { + int flags; ///< Must be zero. + int reserved[3]; ///< Must be zero. + hipMemcpy3DParms copyParams; ///< Params set for the memory copy. +} hipMemcpyNodeParams; + +/** + * Child graph node params + */ +typedef struct hipChildGraphNodeParams { + hipGraph_t graph; ///< Either the child graph to clone into the node, or + ///< a handle to the graph possesed by the node used during query +} hipChildGraphNodeParams; + +/** + * Event record node params + */ +typedef struct hipEventWaitNodeParams { + hipEvent_t event; ///< Event to wait on +} hipEventWaitNodeParams; + +/** + * Event record node params + */ +typedef struct hipEventRecordNodeParams { + hipEvent_t event; ///< The event to be recorded when node executes +} hipEventRecordNodeParams; + +/** + * Memory free node params + */ +typedef struct hipMemFreeNodeParams { + void* dptr; ///< the pointer to be freed +} hipMemFreeNodeParams; + +/** + * Params for different graph nodes + */ +typedef struct hipGraphNodeParams { + hipGraphNodeType type; + int reserved0[3]; + union { + long long reserved1[29]; + hipKernelNodeParams kernel; + hipMemcpyNodeParams memcpy; + hipMemsetParams memset; + hipHostNodeParams host; + hipChildGraphNodeParams graph; + hipEventWaitNodeParams eventWait; + hipEventRecordNodeParams eventRecord; + hipExternalSemaphoreSignalNodeParams extSemSignal; + hipExternalSemaphoreWaitNodeParams extSemWait; + hipMemAllocNodeParams alloc; + hipMemFreeNodeParams free; + }; + + long long reserved2; +} hipGraphNodeParams; + +/** + * This port activates when the kernel has finished executing. + */ +#define hipGraphKernelNodePortDefault 0 + +/** + * This port activates when all blocks of the kernel have begun execution. + */ +#define hipGraphKernelNodePortLaunchCompletion 2 + +/** + * This port activates when all blocks of the kernel have performed + * hipTriggerProgrammaticLaunchCompletion() or have terminated. + * It must be used with edge type hipGraphDependencyTypeProgrammatic. + */ +#define hipGraphKernelNodePortProgrammatic 1 + +typedef enum hipGraphDependencyType { + hipGraphDependencyTypeDefault = 0, + hipGraphDependencyTypeProgrammatic = 1 +} hipGraphDependencyType; + +typedef struct hipGraphEdgeData { + unsigned char + from_port; ///< This indicates when the dependency is triggered from the upstream node on the + ///< edge. The meaning is specfic to the node type. A value of 0 in all cases + ///< means full completion of the upstream node, with memory visibility to the + ///< downstream node or portion thereof (indicated by to_port). Only kernel nodes + ///< define non-zero ports. A kernel node can use the following output port types: + ///< hipGraphKernelNodePortDefault, hipGraphKernelNodePortProgrammatic, or + ///< hipGraphKernelNodePortLaunchCompletion. + unsigned char reserved[5]; ///< These bytes are unused and must be zeroed + unsigned char + to_port; ///< Currently no node types define non-zero ports. This field must be set to zero. + unsigned char type; ///< This should be populated with a value from hipGraphDependencyType +} hipGraphEdgeData; + + +/** + * Used to specify custom attributes for launching kernels + */ +typedef struct hipLaunchAttribute_st { + hipLaunchAttributeID id; ///< Identifier of the launch attribute + char pad[8 - sizeof(hipLaunchAttributeID)]; ///< Padding to align the structure to 8 bytes + union { + hipLaunchAttributeValue val; ///< Value associated with the launch attribute + hipLaunchAttributeValue value; ///< Value associated with the launch attribute + }; +} hipLaunchAttribute; + +/** + * HIP extensible launch configuration + */ +typedef struct hipLaunchConfig_st { + dim3 gridDim; ///< Grid dimensions + dim3 blockDim; ///< Block dimensions + size_t dynamicSmemBytes; ///< Dynamic shared-memory size per thread block + hipStream_t stream; ///< Stream identifier + hipLaunchAttribute* attrs; ///< Attributes list + unsigned int numAttrs; ///< Number of attributes +} hipLaunchConfig_t; + +/** + * HIP driver extensible launch configuration + */ +typedef struct HIP_LAUNCH_CONFIG_st { + unsigned int gridDimX; ///< Grid width in blocks + unsigned int gridDimY; ///< Grid height in blocks + unsigned int gridDimZ; ///< Grid depth in blocks + unsigned int blockDimX; ///< Thread block dimension in X + unsigned int blockDimY; ///< Thread block dimension in Y + unsigned int blockDimZ; ///< Thread block dimension in Z + unsigned int sharedMemBytes; ///< Dynamic shared-memory size in bytes per block + hipStream_t hStream; ///< HIP stream identifier + hipLaunchAttribute* attrs; ///< Attribute list + unsigned int numAttrs; ///< Number of attributes +} HIP_LAUNCH_CONFIG; + +/** + * Requested handle type for address range. + */ +typedef enum hipMemRangeHandleType { + hipMemRangeHandleTypeDmaBufFd = 0x1, + hipMemRangeHandleTypeMax = 0x7fffffff +} hipMemRangeHandleType; + +/** + * Mem Range Flags used in hipMemGetHandleForAddressRange. + */ +typedef enum hipMemRangeFlags { + hipMemRangeFlagDmaBufMappingTypePcie = 0x1, + hipMemRangeFlagsMax = 0x7fffffff +} hipMemRangeFlags; + +// Doxygen end group GlobalDefs +/** + * @} + */ +/** + * @defgroup API HIP API + * @{ + * + * Defines the HIP API. See the individual sections for more information. + */ +/** + * @defgroup Driver Initialization and Version + * @{ + * This section describes the initializtion and version functions of HIP runtime API. + * + */ +/** + * @brief Explicitly initializes the HIP runtime. + * + * @param [in] flags Initialization flag, should be zero. + * + * Most HIP APIs implicitly initialize the HIP runtime. + * This API provides control over the timing of the initialization. + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +// TODO-ctx - more description on error codes. +hipError_t hipInit(unsigned int flags); + +/** + * @brief Returns the approximate HIP driver version. + * + * @param [out] driverVersion driver version + * + * HIP driver version shows up in the format: + * HIP_VERSION_MAJOR * 10000000 + HIP_VERSION_MINOR * 100000 + HIP_VERSION_PATCH. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @warning The HIP driver version does not correspond to an exact CUDA driver revision. + * On AMD platform, the API returns the HIP driver version, while on NVIDIA platform, it calls + * the corresponding CUDA runtime API and returns the CUDA driver version. + * There is no mapping/correlation between HIP driver version and CUDA driver version. + * + * @see hipRuntimeGetVersion + */ +hipError_t hipDriverGetVersion(int* driverVersion); +/** + * @brief Returns the approximate HIP Runtime version. + * + * @param [out] runtimeVersion HIP runtime version + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @warning The version definition of HIP runtime is different from CUDA. + * On AMD platform, the function returns HIP runtime version, + * while on NVIDIA platform, it returns CUDA runtime version. + * And there is no mapping/correlation between HIP version and CUDA version. + * + * @see hipDriverGetVersion + */ +hipError_t hipRuntimeGetVersion(int* runtimeVersion); +/** + * @brief Returns a handle to a compute device + * @param [out] device Handle of device + * @param [in] ordinal Device ordinal + * + * @returns #hipSuccess, #hipErrorInvalidDevice + */ +hipError_t hipDeviceGet(hipDevice_t* device, int ordinal); + +/** + * @brief Returns the compute capability of the device + * @param [out] major Major compute capability version number + * @param [out] minor Minor compute capability version number + * @param [in] device Device ordinal + * + * @returns #hipSuccess, #hipErrorInvalidDevice + */ +hipError_t hipDeviceComputeCapability(int* major, int* minor, hipDevice_t device); +/** + * @brief Returns an identifer string for the device. + * @param [out] name String of the device name + * @param [in] len Maximum length of string to store in device name + * @param [in] device Device ordinal + * + * @returns #hipSuccess, #hipErrorInvalidDevice + */ +hipError_t hipDeviceGetName(char* name, int len, hipDevice_t device); +/** + * @brief Returns an UUID for the device.[BETA] + * @param [out] uuid UUID for the device + * @param [in] device device ordinal + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue, #hipErrorNotInitialized, + * #hipErrorDeinitialized + */ +hipError_t hipDeviceGetUuid(hipUUID* uuid, hipDevice_t device); +/** + * @brief Returns a value for attribute of link between two devices + * @param [out] value Pointer of the value for the attrubute + * @param [in] attr enum of hipDeviceP2PAttr to query + * @param [in] srcDevice The source device of the link + * @param [in] dstDevice The destination device of the link + * + * @returns #hipSuccess, #hipErrorInvalidDevice + */ +hipError_t hipDeviceGetP2PAttribute(int* value, hipDeviceP2PAttr attr, int srcDevice, + int dstDevice); +/** + * @brief Returns a PCI Bus Id string for the device, overloaded to take int device ID. + * @param [out] pciBusId The string of PCI Bus Id format for the device + * @param [in] len Maximum length of string + * @param [in] device The device ordinal + * + * @returns #hipSuccess, #hipErrorInvalidDevice + */ +hipError_t hipDeviceGetPCIBusId(char* pciBusId, int len, int device); +/** + * @brief Returns a handle to a compute device. + * @param [out] device The handle of the device + * @param [in] pciBusId The string of PCI Bus Id for the device + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + */ +hipError_t hipDeviceGetByPCIBusId(int* device, const char* pciBusId); +/** + * @brief Returns the total amount of memory on the device. + * @param [out] bytes The size of memory in bytes, on the device + * @param [in] device The ordinal of the device + * + * @returns #hipSuccess, #hipErrorInvalidDevice + */ +hipError_t hipDeviceTotalMem(size_t* bytes, hipDevice_t device); +// doxygen end initialization +/** + * @} + */ +/** + * @defgroup Device Device Management + * @{ + * This section describes the device management functions of HIP runtime API. + */ +/** + * @brief Waits on all active streams on current device + * + * When this command is invoked, the host thread gets blocked until all the commands associated + * with streams associated with the device. HIP does not support multiple blocking modes (yet!). + * + * @returns #hipSuccess + * + * @see hipSetDevice, hipDeviceReset + */ +hipError_t hipDeviceSynchronize(void); +/** + * @brief The state of current device is discarded and updated to a fresh state. + * + * Calling this function deletes all streams created, memory allocated, kernels running, events + * created. Make sure that no other thread is using the device or streams, memory, kernels, events + * associated with the current device. + * + * @returns #hipSuccess + * + * @see hipDeviceSynchronize + */ +hipError_t hipDeviceReset(void); +/** + * @brief Set default device to be used for subsequent hip API calls from this thread. + * + * @param[in] deviceId Valid device in range 0...hipGetDeviceCount(). + * + * Sets @p device as the default device for the calling host thread. Valid device id's are 0... + * (hipGetDeviceCount()-1). + * + * Many HIP APIs implicitly use the "default device" : + * + * - Any device memory subsequently allocated from this host thread (using hipMalloc) will be + * allocated on device. + * - Any streams or events created from this host thread will be associated with device. + * - Any kernels launched from this host thread (using hipLaunchKernel) will be executed on device + * (unless a specific stream is specified, in which case the device associated with that stream will + * be used). + * + * This function may be called from any host thread. Multiple host threads may use the same device. + * This function does no synchronization with the previous or new device, and has very little + * runtime overhead. Applications can use hipSetDevice to quickly switch the default device before + * making a HIP runtime call which uses the default device. + * + * The default device is stored in thread-local-storage for each thread. + * Thread-pool implementations may inherit the default device of the previous thread. A good + * practice is to always call hipSetDevice at the start of HIP coding sequency to establish a known + * standard device. + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorNoDevice + * + * @see #hipGetDevice, #hipGetDeviceCount + */ +hipError_t hipSetDevice(int deviceId); +/** + * @brief Set a list of devices that can be used. + * + * @param[in] device_arr List of devices to try + * @param[in] len Number of devices in specified list + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @see #hipGetDevice, #hipGetDeviceCount. #hipSetDevice. #hipGetDeviceProperties. + * #hipSetDeviceFlags. #hipChooseDevice + * + * */ +hipError_t hipSetValidDevices(int* device_arr, int len); +/** + * @brief Return the default device id for the calling host thread. + * + * @param [out] deviceId *device is written with the default device + * + * HIP maintains an default device for each thread using thread-local-storage. + * This device is used implicitly for HIP runtime APIs called by this thread. + * hipGetDevice returns in * @p device the default device for the calling host thread. + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @see hipSetDevice, hipGetDevicesizeBytes + */ +hipError_t hipGetDevice(int* deviceId); +/** + * @brief Return number of compute-capable devices. + * + * @param [out] count Returns number of compute-capable devices. + * + * @returns #hipSuccess, #hipErrorNoDevice + * + * + * Returns in @p *count the number of devices that have ability to run compute commands. If there + * are no such devices, then @ref hipGetDeviceCount will return #hipErrorNoDevice. If 1 or more + * devices can be found, then hipGetDeviceCount returns #hipSuccess. + */ +hipError_t hipGetDeviceCount(int* count); +/** + * @brief Query for a specific device attribute. + * + * @param [out] pi pointer to value to return + * @param [in] attr attribute to query + * @param [in] deviceId which device to query for information + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + */ +hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int deviceId); +/** + * @brief Returns the default memory pool of the specified device + * + * @param [out] mem_pool Default memory pool to return + * @param [in] device Device index for query the default memory pool + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @see hipDeviceGetDefaultMemPool, hipMallocAsync, hipMemPoolTrimTo, hipMemPoolGetAttribute, + * hipDeviceSetMemPool, hipMemPoolSetAttribute, hipMemPoolSetAccess, hipMemPoolGetAccess + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + */ +hipError_t hipDeviceGetDefaultMemPool(hipMemPool_t* mem_pool, int device); +/** + * @brief Sets the current memory pool of a device + * + * The memory pool must be local to the specified device. + * @p hipMallocAsync allocates from the current mempool of the provided stream's device. + * By default, a device's current memory pool is its default memory pool. + * + * @note Use @p hipMallocFromPoolAsync for asynchronous memory allocations from a device + * different than the one the stream runs on. + * + * @param [in] device Device index for the update + * @param [in] mem_pool Memory pool for update as the current on the specified device + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice, #hipErrorNotSupported + * + * @see hipDeviceGetDefaultMemPool, hipMallocAsync, hipMemPoolTrimTo, hipMemPoolGetAttribute, + * hipDeviceSetMemPool, hipMemPoolSetAttribute, hipMemPoolSetAccess, hipMemPoolGetAccess + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + */ +hipError_t hipDeviceSetMemPool(int device, hipMemPool_t mem_pool); +/** + * @brief Gets the current memory pool for the specified device + * + * Returns the last pool provided to @p hipDeviceSetMemPool for this device + * or the device's default memory pool if @p hipDeviceSetMemPool has never been called. + * By default the current mempool is the default mempool for a device, + * otherwise the returned pool must have been set with @p hipDeviceSetMemPool. + * + * @param [out] mem_pool Current memory pool on the specified device + * @param [in] device Device index to query the current memory pool + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @see hipDeviceGetDefaultMemPool, hipMallocAsync, hipMemPoolTrimTo, hipMemPoolGetAttribute, + * hipDeviceSetMemPool, hipMemPoolSetAttribute, hipMemPoolSetAccess, hipMemPoolGetAccess + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + */ +hipError_t hipDeviceGetMemPool(hipMemPool_t* mem_pool, int device); +/** + * @brief Returns device properties. + * + * @param [out] prop written with device properties + * @param [in] deviceId which device to query for information + * + * @returns #hipSuccess, #hipErrorInvalidDevice + * @bug HIP-Clang always returns 0 for maxThreadsPerMultiProcessor + * @bug HIP-Clang always returns 0 for regsPerBlock + * @bug HIP-Clang always returns 0 for l2CacheSize + * + * Populates hipGetDeviceProperties with information for the specified device. + */ +hipError_t hipGetDeviceProperties(hipDeviceProp_t* prop, int deviceId); +/** + * @brief Gets the maximum width for 1D linear textures on the specified device + * + * This function queries the maximum width, in elements, of 1D linear textures that can be allocated + * on the specified device. The maximum width depends on the texture element size and the hardware + * limitations of the device. + * + * @param [out] max_width Maximum width, in elements, of 1D linear textures that the device can + * support + * @param [in] desc Requested channel format + * @param [in] device Device index to query for maximum 1D texture width + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice + * + * @see hipDeviceGetAttribute, hipMalloc, hipTexRefSetAddressMode + */ +hipError_t hipDeviceGetTexture1DLinearMaxWidth(size_t* max_width, const hipChannelFormatDesc* desc, + int device); +/** + * @brief Set L1/Shared cache partition. + * + * @param [in] cacheConfig Cache configuration + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorNotSupported + * + * Note: AMD devices do not support reconfigurable cache. This API is not implemented + * on AMD platform. If the function is called, it will return hipErrorNotSupported. + * + */ +hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig); +/** + * @brief Get Cache configuration for a specific Device + * + * @param [out] cacheConfig Pointer of cache configuration + * + * @returns #hipSuccess, #hipErrorNotInitialized + * Note: AMD devices do not support reconfigurable cache. This hint is ignored + * on these architectures. + * + */ +hipError_t hipDeviceGetCacheConfig(hipFuncCache_t* cacheConfig); +/** + * @brief Gets resource limits of current device + * + * The function queries the size of limit value, as required by the input enum value hipLimit_t, + * which can be either #hipLimitStackSize, or #hipLimitMallocHeapSize. Any other input as + * default, the function will return #hipErrorUnsupportedLimit. + * + * @param [out] pValue Returns the size of the limit in bytes + * @param [in] limit The limit to query + * + * @returns #hipSuccess, #hipErrorUnsupportedLimit, #hipErrorInvalidValue + * + */ +hipError_t hipDeviceGetLimit(size_t* pValue, enum hipLimit_t limit); +/** + * @brief Sets resource limits of current device. + * + * As the input enum limit, + * #hipLimitStackSize sets the limit value of the stack size on the current GPU device, per thread. + * The limit size can get via hipDeviceGetLimit. The size is in units of 256 dwords, up to the limit + * (128K - 16). + * + * #hipLimitMallocHeapSize sets the limit value of the heap used by the malloc()/free() + * calls. For limit size, use the #hipDeviceGetLimit API. + * + * Any other input as default, the funtion will return hipErrorUnsupportedLimit. + * + * @param [in] limit Enum of hipLimit_t to set + * @param [in] value The size of limit value in bytes + * + * @returns #hipSuccess, #hipErrorUnsupportedLimit, #hipErrorInvalidValue + * + */ +hipError_t hipDeviceSetLimit(enum hipLimit_t limit, size_t value); +/** + * @brief Returns bank width of shared memory for current device + * + * @param [out] pConfig The pointer of the bank width for shared memory + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + * + * Note: AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is + * ignored on those architectures. + * + */ +hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig* pConfig); +/** + * @brief Gets the flags set for current device + * + * @param [out] flags Pointer of the flags + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + */ +hipError_t hipGetDeviceFlags(unsigned int* flags); +/** + * @brief The bank width of shared memory on current device is set + * + * @param [in] config Configuration for the bank width of shared memory + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + * + * Note: AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is + * ignored on those architectures. + * + */ +hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config); +/** + * @brief The current device behavior is changed according to the flags passed. + * + * @param [in] flags Flag to set on the current device + * + * The schedule flags impact how HIP waits for the completion of a command running on a device. + * + * #hipDeviceScheduleSpin : HIP runtime will actively spin in the thread which submitted + * the work until the command completes. This offers the lowest latency, but will consume a CPU + * core and may increase power. + * + * #hipDeviceScheduleYield : The HIP runtime will yield the CPU to system so that other + * tasks can use it. This may increase latency to detect the completion but will consume less + * power and is friendlier to other tasks in the system. + * + * #hipDeviceScheduleBlockingSync : On ROCm platform, this is a synonym for hipDeviceScheduleYield. + * + * #hipDeviceScheduleAuto : This is the default value if the input 'flags' is zero. + * Uses a heuristic to select between Spin and Yield modes. If the number of HIP contexts is + * greater than the number of logical processors in the system, uses Spin scheduling, otherwise + * uses Yield scheduling. + * + * #hipDeviceMapHost : Allows mapping host memory. On ROCm, this is always allowed and + * the flag is ignored. + * + * #hipDeviceLmemResizeToMax : This flag is silently ignored on ROCm. + * + * @returns #hipSuccess, #hipErrorNoDevice, #hipErrorInvalidDevice, #hipErrorSetOnActiveProcess + * + * + */ +hipError_t hipSetDeviceFlags(unsigned flags); +/** + * @brief Device which matches hipDeviceProp_t is returned + * + * @param [out] device Pointer of the device + * @param [in] prop Pointer of the properties + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* prop); +/** + * @brief Returns the link type and hop count between two devices + * + * @param [in] device1 Ordinal for device1 + * @param [in] device2 Ordinal for device2 + * @param [out] linktype Returns the link type (See hsa_amd_link_info_type_t) between the two + * devices + * @param [out] hopcount Returns the hop count between the two devices + * + * Queries and returns the HSA link type and the hop count between the two specified devices. + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipExtGetLinkTypeAndHopCount(int device1, int device2, uint32_t* linktype, + uint32_t* hopcount); +// TODO: implement IPC apis +/** + * @brief Gets an interprocess memory handle for an existing device memory + * allocation + * + * Takes a pointer to the base of an existing device memory allocation created + * with hipMalloc and exports it for use in another process. This is a + * lightweight operation and may be called multiple times on an allocation + * without adverse effects. + * + * If a region of memory is freed with hipFree and a subsequent call + * to hipMalloc returns memory with the same device address, + * hipIpcGetMemHandle will return a unique handle for the + * new memory. + * + * @param handle - Pointer to user allocated hipIpcMemHandle to return + * the handle in. + * @param devPtr - Base pointer to previously allocated device memory + * + * @returns #hipSuccess, #hipErrorInvalidHandle, #hipErrorOutOfMemory, #hipErrorMapFailed + * + * @note This IPC memory related feature API on Windows may behave differently from Linux. + * + */ +hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr); +/** + * @brief Opens an interprocess memory handle exported from another process + * and returns a device pointer usable in the local process. + * + * Maps memory exported from another process with hipIpcGetMemHandle into + * the current device address space. For contexts on different devices + * hipIpcOpenMemHandle can attempt to enable peer access between the + * devices as if the user called hipDeviceEnablePeerAccess. This behavior is + * controlled by the hipIpcMemLazyEnablePeerAccess flag. + * hipDeviceCanAccessPeer can determine if a mapping is possible. + * + * Contexts that may open hipIpcMemHandles are restricted in the following way. + * hipIpcMemHandles from each device in a given process may only be opened + * by one context per device per other process. + * + * Memory returned from hipIpcOpenMemHandle must be freed with + * hipIpcCloseMemHandle. + * + * Calling hipFree on an exported memory region before calling + * hipIpcCloseMemHandle in the importing context will result in undefined + * behavior. + * + * @param devPtr - Returned device pointer + * @param handle - hipIpcMemHandle to open + * @param flags - Flags for this operation. Must be specified as hipIpcMemLazyEnablePeerAccess + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidContext, + * #hipErrorInvalidDevicePointer + * + * @note During multiple processes, using the same memory handle opened by the current context, + * there is no guarantee that the same device poiter will be returned in @p *devPtr. + * This is diffrent from CUDA. + * @note This IPC memory related feature API on Windows may behave differently from Linux. + * + */ +hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, unsigned int flags); +/** + * @brief Close memory mapped with hipIpcOpenMemHandle + * + * Unmaps memory returnd by hipIpcOpenMemHandle. The original allocation + * in the exporting process as well as imported mappings in other processes + * will be unaffected. + * + * Any resources used to enable peer access will be freed if this is the + * last mapping using them. + * + * @param devPtr - Device pointer returned by hipIpcOpenMemHandle + * + * @returns #hipSuccess, #hipErrorMapFailed, #hipErrorInvalidHandle + * + * @note This IPC memory related feature API on Windows may behave differently from Linux. + * + */ +hipError_t hipIpcCloseMemHandle(void* devPtr); + +/** + * @brief Gets an opaque interprocess handle for an event. + * + * This opaque handle may be copied into other processes and opened with hipIpcOpenEventHandle. + * Then hipEventRecord, hipEventSynchronize, hipStreamWaitEvent and hipEventQuery may be used in + * either process. Operations on the imported event after the exported event has been freed with + * hipEventDestroy will result in undefined behavior. + * + * @param[out] handle Pointer to hipIpcEventHandle to return the opaque event handle + * @param[in] event Event allocated with hipEventInterprocess and hipEventDisableTiming flags + * + * @returns #hipSuccess, #hipErrorInvalidConfiguration, #hipErrorInvalidValue + * + * @note This IPC event related feature API is currently applicable on Linux. + * + */ +hipError_t hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event); + +/** + * @brief Opens an interprocess event handles. + * + * Opens an interprocess event handle exported from another process with hipIpcGetEventHandle. The + * returned hipEvent_t behaves like a locally created event with the hipEventDisableTiming flag + * specified. This event need be freed with hipEventDestroy. Operations on the imported event after + * the exported event has been freed with hipEventDestroy will result in undefined behavior. If the + * function is called within the same process where handle is returned by hipIpcGetEventHandle, it + * will return hipErrorInvalidContext. + * + * @param[out] event Pointer to hipEvent_t to return the event + * @param[in] handle The opaque interprocess handle to open + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidContext + * + * @note This IPC event related feature API is currently applicable on Linux. + * + */ +hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle); + +// end doxygen Device +/** + * @} + */ +/** + * + * @defgroup Execution Execution Control + * @{ + * This section describes the execution control functions of HIP runtime API. + * + */ +/** + * @brief Set attribute for a specific function + * + * @param [in] func Pointer of the function + * @param [in] attr Attribute to set + * @param [in] value Value to set + * + * @returns #hipSuccess, #hipErrorInvalidDeviceFunction, #hipErrorInvalidValue + * + * Note: AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is + * ignored on those architectures. + * + */ +hipError_t hipFuncSetAttribute(const void* func, hipFuncAttribute attr, int value); +/** + * @brief Set Cache configuration for a specific function + * + * @param [in] func Pointer of the function. + * @param [in] config Configuration to set. + * + * @returns #hipSuccess, #hipErrorNotInitialized + * Note: AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is ignored + * on those architectures. + * + */ +hipError_t hipFuncSetCacheConfig(const void* func, hipFuncCache_t config); +/** + * @brief Set shared memory configuation for a specific function + * + * @param [in] func Pointer of the function + * @param [in] config Configuration + * + * @returns #hipSuccess, #hipErrorInvalidDeviceFunction, #hipErrorInvalidValue + * + * Note: AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is + * ignored on those architectures. + * + */ +hipError_t hipFuncSetSharedMemConfig(const void* func, hipSharedMemConfig config); +// doxygen end execution +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Error Error Handling + * @{ + * This section describes the error handling functions of HIP runtime API. + */ +/** + * @brief Return last error returned by any HIP runtime API call and resets the stored error code to + * #hipSuccess + * + * @returns return code from last HIP called from the active host thread + * + * Returns the last error that has been returned by any of the runtime calls in the same host + * thread, and then resets the saved error to #hipSuccess. + * + * @see hipGetErrorString, hipGetLastError, hipPeakAtLastError, hipError_t + */ +hipError_t hipGetLastError(void); + +/** + * @brief Return last error returned by any HIP runtime API call and resets the stored error code to + * #hipSuccess + * + * @returns return code from last HIP called from the active host thread + * + * Returns the last error that has been returned by any of the runtime calls in the same host + * thread, and then resets the saved error to #hipSuccess. + * + * @see hipGetErrorString, hipGetLastError, hipPeakAtLastError, hipError_t + */ +hipError_t hipExtGetLastError(void); + +/** + * @brief Return last error returned by any HIP runtime API call. + * + * @returns #hipSuccess + * + * Returns the last error that has been returned by any of the runtime calls in the same host + * thread. Unlike hipGetLastError, this function does not reset the saved error code. + * + * @see hipGetErrorString, hipGetLastError, hipPeakAtLastError, hipError_t + */ +hipError_t hipPeekAtLastError(void); +/** + * @brief Return hip error as text string form. + * + * @param hip_error Error code to convert to name. + * @returns const char pointer to the NULL-terminated error name + * + * @see hipGetErrorString, hipGetLastError, hipPeakAtLastError, hipError_t + */ +const char* hipGetErrorName(hipError_t hip_error); +/** + * @brief Return handy text string message to explain the error which occurred + * + * @param hipError Error code to convert to string. + * @returns const char pointer to the NULL-terminated error string + * + * @see hipGetErrorName, hipGetLastError, hipPeakAtLastError, hipError_t + */ +const char* hipGetErrorString(hipError_t hipError); +/** + * @brief Return hip error as text string form. + * + * @param [in] hipError Error code to convert to string. + * @param [out] errorString char pointer to the NULL-terminated error string + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipGetErrorName, hipGetLastError, hipPeakAtLastError, hipError_t + */ +hipError_t hipDrvGetErrorName(hipError_t hipError, const char** errorString); +/** + * @brief Return handy text string message to explain the error which occurred + * + * @param [in] hipError Error code to convert to string. + * @param [out] errorString char pointer to the NULL-terminated error string + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipGetErrorName, hipGetLastError, hipPeakAtLastError, hipError_t + */ +hipError_t hipDrvGetErrorString(hipError_t hipError, const char** errorString); +// end doxygen Error +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Stream Stream Management + * @{ + * This section describes the stream management functions of HIP runtime API. + * The following Stream APIs are not (yet) supported in HIP: + * - hipStreamAttachMemAsync is a nop + * - hipDeviceGetStreamPriorityRange returns #hipSuccess + */ + +/** + * @brief Creates an asynchronous stream. + * + * @param[in, out] stream Valid pointer to hipStream_t. This function writes the memory with the + * newly created stream. + * @returns #hipSuccess, #hipErrorInvalidValue + * + * Creates a new asynchronous stream with its associated current device. The @p stream returns an + * opaque handle that can be used to reference the newly created stream in subsequent hipStream* + * commands. The stream is allocated on the heap and will remain allocated even if the handle goes + * out-of-scope. To release the memory used by the stream, the application must call + * hipStreamDestroy. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamSynchronize, + * hipStreamWaitEvent, hipStreamDestroy + */ +hipError_t hipStreamCreate(hipStream_t* stream); +/** + * @brief Creates an asynchronous stream with flag. + * + * @param[in, out] stream Pointer to new stream + * @param[in] flags Parameters to control stream creation + * @returns #hipSuccess, #hipErrorInvalidValue + * + * Creates a new asynchronous stream with its associated current device. @p stream returns an + * opaque handle that can be used to reference the newly created stream in subsequent hipStream* + * commands. The stream is allocated on the heap and will remain allocated even if the handle + * goes out-of-scope. To release the memory used by the stream, application must call + * hipStreamDestroy. + * + * The @p flags parameter controls behavior of the stream. The valid values are #hipStreamDefault + * and #hipStreamNonBlocking. + * + * @see hipStreamCreate, hipStreamCreateWithPriority, hipStreamSynchronize, hipStreamWaitEvent, + * hipStreamDestroy. + * + */ +hipError_t hipStreamCreateWithFlags(hipStream_t* stream, unsigned int flags); +/** + * @brief Creates an asynchronous stream with the specified priority. + * + * @param[in, out] stream Pointer to new stream + * @param[in] flags Parameters to control stream creation + * @param[in] priority Priority of the stream. Lower numbers represent higher priorities. + * @returns #hipSuccess, #hipErrorInvalidValue + * + * Creates a new asynchronous stream with the specified priority, with its associated current + * device. + * @p stream returns an opaque handle that can be used to reference the newly created stream in + * subsequent hipStream* commands. The stream is allocated on the heap and will remain allocated + * even if the handle goes out-of-scope. To release the memory used by the stream, application must + * call hipStreamDestroy. + * + * The @p flags parameter controls behavior of the stream. The valid values are #hipStreamDefault + * and #hipStreamNonBlocking. + * + * @see hipStreamCreate, hipStreamSynchronize, hipStreamWaitEvent, hipStreamDestroy + * + */ +hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags, int priority); +/** + * @brief Returns numerical values that correspond to the least and greatest stream priority. + * + * @param[in, out] leastPriority Pointer in which a value corresponding to least priority + * is returned. + * @param[in, out] greatestPriority Pointer in which a value corresponding to greatest priority + * is returned. + * @returns #hipSuccess + * + * Returns in *leastPriority and *greatestPriority the numerical values that correspond to the + * least and greatest stream priority respectively. Stream priorities follow a convention where + * lower numbers imply greater priorities. The range of meaningful stream priorities is given by + * [*leastPriority,*greatestPriority]. If the user attempts to create a stream with a priority + * value that is outside the meaningful range as specified by this API, the priority is + * automatically clamped to within the valid range. + * + * @warning This API is under development on AMD GPUs and simply returns #hipSuccess. + */ +hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority); +/** + * @brief Destroys the specified stream. + * + * @param[in] stream Stream identifier + * @returns #hipSuccess #hipErrorInvalidHandle + * + * Destroys the specified stream. + * + * If commands are still executing on the specified stream, some may complete execution before the + * queue is deleted. + * + * The queue may be destroyed while some commands are still inflight, or may wait for all commands + * queued to the stream before destroying it. + * + * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamQuery, + * hipStreamWaitEvent, hipStreamSynchronize + */ +hipError_t hipStreamDestroy(hipStream_t stream); +/** + * @brief Returns #hipSuccess if all of the operations in the specified @p stream have completed, or + * #hipErrorNotReady if not. + * + * @param[in] stream Stream to query + * + * @returns #hipSuccess, #hipErrorNotReady, #hipErrorInvalidHandle + * + * This is thread-safe and returns a snapshot of the current state of the queue. However, if other + * host threads are sending work to the stream, the status may change immediately after the function + * is called. It is typically used for debug. + * + * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamWaitEvent, + * hipStreamSynchronize, hipStreamDestroy + */ +hipError_t hipStreamQuery(hipStream_t stream); +/** + * @brief Waits for all commands in the stream to complete. + * + * @param[in] stream Stream identifier. + * + * @returns #hipSuccess, #hipErrorInvalidHandle + * + * This command is host-synchronous : the host will block until all operations on the specified + * stream with its associated device are completed. On multiple device systems, the @p stream is + * associated with its device, no need to call hipSetDevice before this API. + * + * This command follows standard null-stream semantics. Specifying the null stream will cause the + * command to wait for other streams on the same device to complete all pending operations. + * + * This command honors the #hipDeviceScheduleBlockingSync flag, which controls whether the wait is + * active or blocking. + * + * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamWaitEvent, + * hipStreamDestroy + * + */ +hipError_t hipStreamSynchronize(hipStream_t stream); +/** + * @brief Makes the specified compute stream wait for the specified event + * + * @param[in] stream Stream to make wait + * @param[in] event Event to wait on + * @param[in] flags Parameters to control the operation + * + * @returns #hipSuccess, #hipErrorInvalidHandle, #hipErrorInvalidValue, + * #hipErrorStreamCaptureIsolation + * + * This function inserts a wait operation into the specified stream. + * All future work submitted to @p stream will wait until @p event reports completion before + * beginning execution. + * + * Flags include: + * hipEventWaitDefault: Default event creation flag. + * hipEventWaitExternal: Wait is captured in the graph as an external event node when + * performing stream capture + * + * This function only waits for commands in the current stream to complete. Notably, this function + * does not implicitly wait for commands in the default stream to complete, even if the specified + * stream is created with hipStreamNonBlocking = 0. + * + * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, + * hipStreamSynchronize, hipStreamDestroy + */ +hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags __dparm(0)); +/** + * @brief Returns flags associated with this stream. + * + * @param[in] stream Stream to be queried + * @param[in,out] flags Pointer to an unsigned integer in which the stream's flags are returned + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidHandle. + * + * @see hipStreamCreateWithFlags + */ +hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int* flags); +/** + * @brief Queries the Id of a stream. + * + * @param[in] stream Stream to be queried + * @param[in,out] flags Pointer to an unsigned long long in which the stream's id is returned + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidHandle. + * + * @see hipStreamCreateWithFlags, hipStreamGetFlags, hipStreamCreateWithPriority, hipStreamGetPriority + */ +hipError_t hipStreamGetId(hipStream_t stream, unsigned long long* streamId); +/** + * @brief Queries the priority of a stream. + * + * @param[in] stream Stream to be queried + * @param[in,out] priority Pointer to an unsigned integer in which the stream's priority is + * returned + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidHandle. + * + * @see hipStreamCreateWithPriority + */ +hipError_t hipStreamGetPriority(hipStream_t stream, int* priority); +/** + * @brief Gets the device associated with the stream. + * + * @param[in] stream Stream to be queried + * @param[out] device Device associated with the stream + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorContextIsDestroyed, #hipErrorInvalidHandle, + * #hipErrorNotInitialized, #hipErrorDeinitialized, #hipErrorInvalidContext + * + * @see hipStreamCreate, hipStreamDestroy, hipDeviceGetStreamPriorityRange + */ +hipError_t hipStreamGetDevice(hipStream_t stream, hipDevice_t* device); +/** + * @brief Creates an asynchronous stream with the specified CU mask. + * + * @param[in, out] stream Pointer to new stream + * @param[in] cuMaskSize Size of CU mask bit array passed in. + * @param[in] cuMask Bit-vector representing the CU mask. Each active bit represents using one CU. + * The first 32 bits represent the first 32 CUs, and so on. If its size is greater than physical + * CU number (i.e., multiProcessorCount member of hipDeviceProp_t), the extra elements are ignored. + * It is user's responsibility to make sure the input is meaningful. + * @returns #hipSuccess, #hipErrorInvalidHandle, #hipErrorInvalidValue + * + * Creates a new asynchronous stream with the specified CU mask. @p stream returns an opaque + * handle that can be used to reference the newly created stream in subsequent hipStream* commands. + * The stream is allocated on the heap and will remain allocated even if the handle goes + * out-of-scope. To release the memory used by the stream, application must call hipStreamDestroy. + * + * @see hipStreamCreate, hipStreamSynchronize, hipStreamWaitEvent, hipStreamDestroy + */ +hipError_t hipExtStreamCreateWithCUMask(hipStream_t* stream, uint32_t cuMaskSize, + const uint32_t* cuMask); +/** + * @brief Gets CU mask associated with an asynchronous stream + * + * @param[in] stream Stream to be queried + * @param[in] cuMaskSize Number of the block of memories (uint32_t *) allocated by user + * @param[out] cuMask Pointer to a pre-allocated block of memories (uint32_t *) in which + * the stream's CU mask is returned. The CU mask is returned in a chunck of 32 bits where + * each active bit represents one active CU. + * @returns #hipSuccess, #hipErrorInvalidHandle, #hipErrorInvalidValue + * + * @see hipStreamCreate, hipStreamSynchronize, hipStreamWaitEvent, hipStreamDestroy + */ +hipError_t hipExtStreamGetCUMask(hipStream_t stream, uint32_t cuMaskSize, uint32_t* cuMask); +/** + * Stream CallBack struct + */ +typedef void (*hipStreamCallback_t)(hipStream_t stream, hipError_t status, void* userData); +/** + * @brief Adds a callback to be called on the host after all currently enqueued items in the stream + * have completed. For each hipStreamAddCallback call, a callback will be executed exactly once. + * The callback will block later work in the stream until it is finished. + * + * @param[in] stream - Stream to add callback to + * @param[in] callback - The function to call once preceding stream operations are complete + * @param[in] userData - User specified data to be passed to the callback function + * @param[in] flags - Reserved for future use, must be 0 + * @returns #hipSuccess, #hipErrorInvalidHandle, #hipErrorNotSupported + * + * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamQuery, hipStreamSynchronize, + * hipStreamWaitEvent, hipStreamDestroy, hipStreamCreateWithPriority + * + */ +hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData, + unsigned int flags); + +/** + *@brief Sets stream attribute. Updated attribute is applied to work submitted to the stream. + * @param[in] stream - Stream to set attributes to + * @param[in] attr - Attribute ID for the attribute to set + * @param[in] value - Attribute value for the attribute to set + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidResourceHandle + */ +hipError_t hipStreamSetAttribute(hipStream_t stream, hipStreamAttrID attr, + const hipStreamAttrValue* value); + +/** + *@brief queries stream attribute. + * @param[in] stream - Stream to geet attributes from + * @param[in] attr - Attribute ID for the attribute to query + * @param[out] value - Attribute value output + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidResourceHandle + */ +hipError_t hipStreamGetAttribute(hipStream_t stream, hipStreamAttrID attr, + hipStreamAttrValue* value_out); + +/** + *@brief Copies attributes from source stream to destination stream. + * @param[in] dst - Destination stream + * @param[in] src - Source stream + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipStreamCopyAttributes(hipStream_t dst, hipStream_t src); + +// end doxygen Stream +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup StreamM Stream Memory Operations + * @{ + * This section describes Stream Memory Wait and Write functions of HIP runtime API. + */ + +/** + * @brief Enqueues a wait command to the stream.[BETA] + * + * @param [in] stream - Stream identifier + * @param [in] ptr - Pointer to memory object allocated using #hipMallocSignalMemory flag + * @param [in] value - Value to be used in compare operation + * @param [in] flags - Defines the compare operation, supported values are #hipStreamWaitValueGte + * #hipStreamWaitValueEq, #hipStreamWaitValueAnd and #hipStreamWaitValueNor + * @param [in] mask - Mask to be applied on value at memory before it is compared with value, + * default value is set to enable every bit + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * Enqueues a wait command to the stream, all operations enqueued on this stream after this, will + * not execute until the defined wait condition is true. + * + * #hipStreamWaitValueGte: waits until *ptr&mask >= value + * + * #hipStreamWaitValueEq : waits until *ptr&mask == value + * + * #hipStreamWaitValueAnd: waits until ((*ptr&mask) & value) != 0 + * + * #hipStreamWaitValueNor: waits until ~((*ptr&mask) | (value&mask)) != 0 + * + * @note when using #hipStreamWaitValueNor, mask is applied on both 'value' and '*ptr'. + * + * @note Support for #hipStreamWaitValue32 can be queried using 'hipDeviceGetAttribute()' and + * 'hipDeviceAttributeCanUseStreamWaitValue' flag. + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @see hipExtMallocWithFlags, hipFree, hipStreamWaitValue64, hipStreamWriteValue64, + * hipStreamWriteValue32, hipDeviceGetAttribute + */ + +hipError_t hipStreamWaitValue32(hipStream_t stream, void* ptr, uint32_t value, unsigned int flags, + uint32_t mask __dparm(0xFFFFFFFF)); + +/** + * @brief Enqueues a wait command to the stream.[BETA] + * + * @param [in] stream - Stream identifier + * @param [in] ptr - Pointer to memory object allocated using 'hipMallocSignalMemory' flag + * @param [in] value - Value to be used in compare operation + * @param [in] flags - Defines the compare operation, supported values are #hipStreamWaitValueGte + * #hipStreamWaitValueEq, #hipStreamWaitValueAnd and #hipStreamWaitValueNor. + * @param [in] mask - Mask to be applied on value at memory before it is compared with value + * default value is set to enable every bit + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * Enqueues a wait command to the stream, all operations enqueued on this stream after this, will + * not execute until the defined wait condition is true. + * + * #hipStreamWaitValueGte: waits until *ptr&mask >= value + * + * #hipStreamWaitValueEq : waits until *ptr&mask == value + * + * #hipStreamWaitValueAnd: waits until ((*ptr&mask) & value) != 0 + * + * #hipStreamWaitValueNor: waits until ~((*ptr&mask) | (value&mask)) != 0 + * + * @note when using #hipStreamWaitValueNor, mask is applied on both 'value' and '*ptr'. + * + * @note Support for hipStreamWaitValue64 can be queried using 'hipDeviceGetAttribute()' and + * 'hipDeviceAttributeCanUseStreamWaitValue' flag. + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @see hipExtMallocWithFlags, hipFree, hipStreamWaitValue32, hipStreamWriteValue64, + * hipStreamWriteValue32, hipDeviceGetAttribute + */ + +hipError_t hipStreamWaitValue64(hipStream_t stream, void* ptr, uint64_t value, unsigned int flags, + uint64_t mask __dparm(0xFFFFFFFFFFFFFFFF)); + +/** + * @brief Enqueues a write command to the stream.[BETA] + * + * @param [in] stream - Stream identifier + * @param [in] ptr - Pointer to a GPU accessible memory object + * @param [in] value - Value to be written + * @param [in] flags - reserved, ignored for now, will be used in future releases + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * Enqueues a write command to the stream, write operation is performed after all earlier commands + * on this stream have completed the execution. + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @see hipExtMallocWithFlags, hipFree, hipStreamWriteValue32, hipStreamWaitValue32, + * hipStreamWaitValue64 + */ + +hipError_t hipStreamWriteValue32(hipStream_t stream, void* ptr, uint32_t value, unsigned int flags); +/** + * @brief Enqueues a write command to the stream.[BETA] + * + * @param [in] stream - Stream identifier + * @param [in] ptr - Pointer to a GPU accessible memory object + * @param [in] value - Value to be written + * @param [in] flags - reserved, ignored for now, will be used in future releases + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * Enqueues a write command to the stream, write operation is performed after all earlier commands + * on this stream have completed the execution. + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @see hipExtMallocWithFlags, hipFree, hipStreamWriteValue32, hipStreamWaitValue32, + * hipStreamWaitValue64 + */ + +hipError_t hipStreamWriteValue64(hipStream_t stream, void* ptr, uint64_t value, unsigned int flags); + +/** + * @brief Enqueues an array of stream memory operations in the stream.[BETA] + * + * @param [in] stream - Stream identifier + * @param [in] count - The number of operations in the array. Must be less than 256 + * @param [in] paramArray - The types and parameters of the individual operations. + * @param [in] flags - Reserved for future expansion; must be 0. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * Batch operations to synchronize the stream via memory operations. + * + * @warning This API is marked as beta, meaning, while this is feature complete, + * it is still open to changes and may have outstanding issues. + * + * @see hipStreamWriteValue32, hipStreamWaitValue32, + * hipStreamWaitValue64. hipStreamWriteValue64 + */ + +hipError_t hipStreamBatchMemOp(hipStream_t stream, unsigned int count, + hipStreamBatchMemOpParams* paramArray, unsigned int flags); + +/** + * @brief Creates a batch memory operation node and adds it to a graph.[BETA] + * + * @param [in] phGraphNode - Returns the newly created node + * @param [in] hGraph - Graph to which to add the node + * @param [in] dependencies - Dependencies of the node + * @param [in] numDependencies - Number of dependencies + * @param [in] nodeParams - Parameters for the node + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @warning This API is marked as beta, meaning, while this is feature complete, + * it is still open to changes and may have outstanding issues. + * + * @see hipStreamWriteValue32, hipStreamWaitValue32, + * hipStreamWaitValue64. hipStreamWriteValue64, hipStreamBatchMemOp + */ +hipError_t hipGraphAddBatchMemOpNode(hipGraphNode_t* phGraphNode, hipGraph_t hGraph, + const hipGraphNode_t* dependencies, size_t numDependencies, + const hipBatchMemOpNodeParams* nodeParams); + +/** + * @brief Returns a batch mem op node's parameters.[BETA] + * + * @param [in] hNode - Node to get the parameters for + * @param [in] nodeParams_out - Pointer to return the parameters + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * Returns the parameters of batch mem op node hNode in nodeParams_out. + * The paramArray returned in nodeParams_out is owned by the node. + * This memory remains valid until the node is destroyed or its parameters are modified, + * and should not be modified directly. + * + * @warning This API is marked as beta, meaning, while this is feature complete, + * it is still open to changes and may have outstanding issues. + * + * @see hipStreamWriteValue32, hipStreamWaitValue32, + * hipStreamWaitValue64. hipStreamWriteValue64. hipGraphBatchMemOpNodeSetParams + */ + +hipError_t hipGraphBatchMemOpNodeGetParams(hipGraphNode_t hNode, + hipBatchMemOpNodeParams* nodeParams_out); + +/** + * @brief Sets the batch mem op node's parameters.[BETA] + * + * @param [in] hNode - Node to set the parameters for + * @param [in] nodeParams - Parameters to copy + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * Sets the parameters of batch mem op node hNode to nodeParams. + * + * @warning This API is marked as beta, meaning, while this is feature complete, + * it is still open to changes and may have outstanding issues. + * + * @see hipStreamWriteValue32, hipStreamWaitValue32, + * hipStreamWaitValue64. hipStreamWriteValue64, hipGraphBatchMemOpNodeGetParams + */ + +hipError_t hipGraphBatchMemOpNodeSetParams(hipGraphNode_t hNode, + hipBatchMemOpNodeParams* nodeParams); + +/** + * @brief Sets the parameters for a batch mem op node in the given graphExec.[BETA] + * + * @param [in] hGraphExec - The executable graph in which to set the specified node + * @param [in] hNode - Batch mem op node from the graph from which graphExec was instantiated + * @param [in] nodeParams - Updated Parameters to set + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * Sets the parameters of a batch mem op node in an executable graph hGraphExec. + * The node is identified by the corresponding node hNode in the non-executable graph, + * from which the executable graph was instantiated. + * + * @warning This API is marked as beta, meaning, while this is feature complete, + * it is still open to changes and may have outstanding issues. + * + * @see hipStreamWriteValue32, hipStreamWaitValue32, + * hipStreamWaitValue64. hipStreamWriteValue64, hipStreamBatchMemOp + */ +hipError_t hipGraphExecBatchMemOpNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + const hipBatchMemOpNodeParams* nodeParams); + +// end doxygen Stream Memory Operations +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Event Event Management + * @{ + * This section describes the event management functions of HIP runtime API. + */ +/** + * @brief Create an event with the specified flags + * + * @param[in,out] event Returns the newly created event. + * @param[in] flags Flags to control event behavior. Valid values are #hipEventDefault, + #hipEventBlockingSync, #hipEventDisableTiming, #hipEventInterprocess + * #hipEventDefault : Default flag. The event will use active synchronization and will support + timing. Blocking synchronization provides lowest possible latency at the expense of dedicating a + CPU to poll on the event. + * #hipEventBlockingSync : The event will use blocking synchronization : if hipEventSynchronize is + called on this event, the thread will block until the event completes. This can increase latency + for the synchroniation but can result in lower power and more resources for other CPU threads. + * #hipEventDisableTiming : Disable recording of timing information. Events created with this flag + would not record profiling data and provide best performance if used for synchronization. + * #hipEventInterprocess : The event can be used as an interprocess event. hipEventDisableTiming + flag also must be set when hipEventInterprocess flag is set. + * #hipEventDisableSystemFence : Disable acquire and release system scope fence. This may + improve performance but device memory may not be visible to the host and other devices + if this flag is set. + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue, + #hipErrorLaunchFailure, #hipErrorOutOfMemory + * + * @see hipEventCreate, hipEventSynchronize, hipEventDestroy, hipEventElapsedTime + */ +hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned flags); +/** + * Create an event + * + * @param[in,out] event Returns the newly created event. + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue, + * #hipErrorLaunchFailure, #hipErrorOutOfMemory + * + * @see hipEventCreateWithFlags, hipEventRecord, hipEventQuery, hipEventSynchronize, + * hipEventDestroy, hipEventElapsedTime + */ +hipError_t hipEventCreate(hipEvent_t* event); +/** + * @brief Record an event in the specified stream. + * + * @param[in] event event to record. + * @param[in] stream stream in which to record event. + * @param[in] flags parameter for operations + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized, + * #hipErrorInvalidHandle, #hipErrorLaunchFailure + * + * hipEventQuery() or hipEventSynchronize() must be used to determine when the event + * transitions from "recording" (after hipEventRecord() is called) to "recorded" + * (when timestamps are set, if requested). + * + * Events which are recorded in a non-NULL stream will transition to + * from recording to "recorded" state when they reach the head of + * the specified stream, after all previous + * commands in that stream have completed executing. + * + * Flags include: + * hipEventRecordDefault: Default event creation flag. + * hipEventRecordExternal: Event is captured in the graph as an external event node when + * performing stream capture + * + * If hipEventRecord() has been previously called on this event, then this call will overwrite any + * existing state in event. + * + * If this function is called on an event that is currently being recorded, results are undefined + * - either outstanding recording may save state into the event, and the order is not guaranteed. + * + * @note: If this function is not called before use hipEventQuery() or hipEventSynchronize(), + * #hipSuccess is returned, meaning no pending event in the stream. + * + * @see hipEventCreate, hipEventCreateWithFlags, hipEventQuery, hipEventSynchronize, + * hipEventDestroy, hipEventElapsedTime + * + */ +hipError_t hipEventRecordWithFlags(hipEvent_t event, hipStream_t stream __dparm(0), + unsigned int flags __dparm(0)); +/** + * @brief Record an event in the specified stream. + * + * @param[in] event event to record. + * @param[in] stream stream in which to record event. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized, + * #hipErrorInvalidHandle, #hipErrorLaunchFailure + * + * hipEventQuery() or hipEventSynchronize() must be used to determine when the event + * transitions from "recording" (after hipEventRecord() is called) to "recorded" + * (when timestamps are set, if requested). + * + * Events which are recorded in a non-NULL stream will transition to + * from recording to "recorded" state when they reach the head of + * the specified stream, after all previous + * commands in that stream have completed executing. + * + * If hipEventRecord() has been previously called on this event, then this call will overwrite any + * existing state in event. + * + * If this function is called on an event that is currently being recorded, results are undefined + * - either outstanding recording may save state into the event, and the order is not guaranteed. + * + * @note If this function is not called before use hipEventQuery() or hipEventSynchronize(), + * #hipSuccess is returned, meaning no pending event in the stream. + * + * @see hipEventCreate, hipEventCreateWithFlags, hipEventQuery, hipEventSynchronize, + * hipEventDestroy, hipEventElapsedTime + * + */ +#ifdef __cplusplus +hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream = NULL); +#else +hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream); +#endif +/** + * @brief Destroy the specified event. + * + * @param[in] event Event to destroy. + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue, + * #hipErrorLaunchFailure + * + * Releases memory associated with the event. If the event is recording but has not completed + * recording when hipEventDestroy() is called, the function will return immediately and the + * completion_future resources will be released later, when the hipDevice is synchronized. + * + * @see hipEventCreate, hipEventCreateWithFlags, hipEventQuery, hipEventSynchronize, hipEventRecord, + * hipEventElapsedTime + * + * @returns #hipSuccess + */ +hipError_t hipEventDestroy(hipEvent_t event); +/** + * @brief Wait for an event to complete. + * + * This function will block until the event is ready, waiting for all previous work in the stream + * specified when event was recorded with hipEventRecord(). + * + * If hipEventRecord() has not been called on @p event, this function returns #hipSuccess when no + * event is captured. + * + * + * @param[in] event Event on which to wait. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized, + * #hipErrorInvalidHandle, #hipErrorLaunchFailure + * + * @see hipEventCreate, hipEventCreateWithFlags, hipEventQuery, hipEventDestroy, hipEventRecord, + * hipEventElapsedTime + */ +hipError_t hipEventSynchronize(hipEvent_t event); +/** + * @brief Return the elapsed time between two events. + * + * @param[out] ms : Return time between start and stop in ms. + * @param[in] start : Start event. + * @param[in] stop : Stop event. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotReady, #hipErrorInvalidHandle, + * #hipErrorNotInitialized, #hipErrorLaunchFailure + * + * Computes the elapsed time between two events. Time is computed in ms, with + * a resolution of approximately 1 us. + * + * Events which are recorded in a NULL stream will block until all commands + * on all other streams complete execution, and then record the timestamp. + * + * Events which are recorded in a non-NULL stream will record their timestamp + * when they reach the head of the specified stream, after all previous + * commands in that stream have completed executing. Thus the time that + * the event recorded may be significantly after the host calls hipEventRecord(). + * + * If hipEventRecord() has not been called on either event, then #hipErrorInvalidHandle is + * returned. If hipEventRecord() has been called on both events, but the timestamp has not yet been + * recorded on one or both events (that is, hipEventQuery() would return #hipErrorNotReady on at + * least one of the events), then #hipErrorNotReady is returned. + * + * @see hipEventCreate, hipEventCreateWithFlags, hipEventQuery, hipEventDestroy, hipEventRecord, + * hipEventSynchronize + */ +hipError_t hipEventElapsedTime(float* ms, hipEvent_t start, hipEvent_t stop); +/** + * @brief Query event status + * + * @param[in] event Event to query. + * @returns #hipSuccess, #hipErrorNotReady, #hipErrorInvalidHandle, #hipErrorInvalidValue, + * #hipErrorNotInitialized, #hipErrorLaunchFailure + * + * Query the status of the specified event. This function will return #hipSuccess if all + * commands in the appropriate stream (specified to hipEventRecord()) have completed. If any + * execution has not completed, then #hipErrorNotReady is returned. + * + * @note This API returns #hipSuccess, if hipEventRecord() is not called before this API. + * + * @see hipEventCreate, hipEventCreateWithFlags, hipEventRecord, hipEventDestroy, + * hipEventSynchronize, hipEventElapsedTime + */ +hipError_t hipEventQuery(hipEvent_t event); +// end doxygen Events +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Memory Memory Management + * @{ + * This section describes the memory management functions of HIP runtime API. + * The following CUDA APIs are not currently supported: + * - cudaMalloc3D + * - cudaMalloc3DArray + * - TODO - more 2D, 3D, array APIs here. + * + * + */ + +/** + * @brief Sets information on the specified pointer.[BETA] + * + * @param [in] value Sets pointer attribute value + * @param [in] attribute Attribute to set + * @param [in] ptr Pointer to set attributes for + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + */ +hipError_t hipPointerSetAttribute(const void* value, hipPointer_attribute attribute, + hipDeviceptr_t ptr); + + +/** + * @brief Returns attributes for the specified pointer + * + * @param [out] attributes attributes for the specified pointer + * @param [in] ptr pointer to get attributes for + * + * The output parameter 'attributes' has a member named 'type' that describes what memory the + * pointer is associated with, such as device memory, host memory, managed memory, and others. + * Otherwise, the API cannot handle the pointer and returns #hipErrorInvalidValue. + * + * @note The unrecognized memory type is unsupported to keep the HIP functionality backward + * compatibility due to #hipMemoryType enum values. + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @note The current behavior of this HIP API corresponds to the CUDA API before version 11.0. + * + * @see hipPointerGetAttribute + */ +hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void* ptr); +/** + * @brief Returns information about the specified pointer.[BETA] + * + * @param [in, out] data Returned pointer attribute value + * @param [in] attribute Attribute to query for + * @param [in] ptr Pointer to get attributes for + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @see hipPointerGetAttributes + */ +hipError_t hipPointerGetAttribute(void* data, hipPointer_attribute attribute, hipDeviceptr_t ptr); +/** + * @brief Returns information about the specified pointer.[BETA] + * + * @param [in] numAttributes number of attributes to query for + * @param [in] attributes attributes to query for + * @param [in, out] data a two-dimensional containing pointers to memory locations + * where the result of each attribute query will be written to + * @param [in] ptr pointer to get attributes for + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @see hipPointerGetAttribute + */ +hipError_t hipDrvPointerGetAttributes(unsigned int numAttributes, hipPointer_attribute* attributes, + void** data, hipDeviceptr_t ptr); +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup External External Resource Interoperability + * @{ + * @ingroup API + * + * This section describes the external resource interoperability functions of HIP runtime API. + * + */ +/** + * @brief Imports an external semaphore. + * + * @param[out] extSem_out External semaphores to be waited on + * @param[in] semHandleDesc Semaphore import handle descriptor + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @see + * + * @note This API is currently not supported on Linux. + * + */ +hipError_t hipImportExternalSemaphore(hipExternalSemaphore_t* extSem_out, + const hipExternalSemaphoreHandleDesc* semHandleDesc); +/** + * @brief Signals a set of external semaphore objects. + * + * @param[in] extSemArray External semaphores to be waited on + * @param[in] paramsArray Array of semaphore parameters + * @param[in] numExtSems Number of semaphores to wait on + * @param[in] stream Stream to enqueue the wait operations in + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @see + * + * @note This API is currently not supported on Linux. + * + */ +hipError_t hipSignalExternalSemaphoresAsync(const hipExternalSemaphore_t* extSemArray, + const hipExternalSemaphoreSignalParams* paramsArray, + unsigned int numExtSems, hipStream_t stream); +/** + * @brief Waits on a set of external semaphore objects + * + * @param[in] extSemArray External semaphores to be waited on + * @param[in] paramsArray Array of semaphore parameters + * @param[in] numExtSems Number of semaphores to wait on + * @param[in] stream Stream to enqueue the wait operations in + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @see + * + * @note This API is currently not supported on Linux. + * + */ +hipError_t hipWaitExternalSemaphoresAsync(const hipExternalSemaphore_t* extSemArray, + const hipExternalSemaphoreWaitParams* paramsArray, + unsigned int numExtSems, hipStream_t stream); +/** + * @brief Destroys an external semaphore object and releases any references to the underlying + * resource. Any outstanding signals or waits must have completed before the semaphore is destroyed. + * + * @param[in] extSem handle to an external memory object + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @see + * + * @note This API is currently not supported on Linux. + * + */ +hipError_t hipDestroyExternalSemaphore(hipExternalSemaphore_t extSem); + +/** + * @brief Imports an external memory object. + * + * @param[out] extMem_out Returned handle to an external memory object + * @param[in] memHandleDesc Memory import handle descriptor + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @see + * + */ +hipError_t hipImportExternalMemory(hipExternalMemory_t* extMem_out, + const hipExternalMemoryHandleDesc* memHandleDesc); +/** + * @brief Maps a buffer onto an imported memory object. + * + * @param[out] devPtr Returned device pointer to buffer + * @param[in] extMem Handle to external memory object + * @param[in] bufferDesc Buffer descriptor + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @see + */ +hipError_t hipExternalMemoryGetMappedBuffer(void** devPtr, hipExternalMemory_t extMem, + const hipExternalMemoryBufferDesc* bufferDesc); +/** + * @brief Destroys an external memory object. + * + * @param[in] extMem External memory object to be destroyed + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @see + */ +hipError_t hipDestroyExternalMemory(hipExternalMemory_t extMem); +/** + * @brief Maps a mipmapped array onto an external memory object. + * + * @param[out] mipmap mipmapped array to return + * @param[in] extMem external memory object handle + * @param[in] mipmapDesc external mipmapped array descriptor + * + * Returned mipmapped array must be freed using hipFreeMipmappedArray. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidResourceHandle + * + * @see hipImportExternalMemory, hipDestroyExternalMemory, hipExternalMemoryGetMappedBuffer, + * hipFreeMipmappedArray + */ +hipError_t hipExternalMemoryGetMappedMipmappedArray( + hipMipmappedArray_t* mipmap, hipExternalMemory_t extMem, + const hipExternalMemoryMipmappedArrayDesc* mipmapDesc); +// end of external resource +/** + * @} + */ +/** + * @brief Allocate memory on the default accelerator + * + * @param[out] ptr Pointer to the allocated memory + * @param[in] size Requested memory size + * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * + * @returns #hipSuccess, #hipErrorOutOfMemory, #hipErrorInvalidValue (bad context, null *ptr) + * + * @see hipMallocPitch, hipFree, hipMallocArray, hipFreeArray, hipMalloc3D, hipMalloc3DArray, + * hipHostFree, hipHostMalloc + */ +hipError_t hipMalloc(void** ptr, size_t size); +/** + * @brief Allocate memory on the default accelerator + * + * @param[out] ptr Pointer to the allocated memory + * @param[in] sizeBytes Requested memory size + * @param[in] flags Type of memory allocation + * + * If requested memory size is 0, no memory is allocated, *ptr returns nullptr, and #hipSuccess + * is returned. + * + * The memory allocation flag should be either #hipDeviceMallocDefault, + * #hipDeviceMallocFinegrained, #hipDeviceMallocUncached, or #hipMallocSignalMemory. + * If the flag is any other value, the API returns #hipErrorInvalidValue. + * + * @returns #hipSuccess, #hipErrorOutOfMemory, #hipErrorInvalidValue (bad context, null *ptr) + * + * @see hipMallocPitch, hipFree, hipMallocArray, hipFreeArray, hipMalloc3D, hipMalloc3DArray, + * hipHostFree, hiHostMalloc + */ +hipError_t hipExtMallocWithFlags(void** ptr, size_t sizeBytes, unsigned int flags); + + +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup MemoryD Memory Management [Deprecated] + * @ingroup Memory + * @{ + * This section describes the deprecated memory management functions of HIP runtime API. + * + */ + +/** + * @brief Allocate pinned host memory [Deprecated] + * + * @param[out] ptr Pointer to the allocated host pinned memory + * @param[in] size Requested memory size + * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * + * @returns #hipSuccess, #hipErrorOutOfMemory + * + * @warning This API is deprecated, use hipHostMalloc() instead + */ +HIP_DEPRECATED("use hipHostMalloc instead") +hipError_t hipMallocHost(void** ptr, size_t size); +/** + * @brief Allocate pinned host memory [Deprecated] + * + * @param[out] ptr Pointer to the allocated host pinned memory + * @param[in] size Requested memory size + * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * + * @returns #hipSuccess, #hipErrorOutOfMemory + * + * @warning This API is deprecated, use hipHostMalloc() instead + */ +HIP_DEPRECATED("use hipHostMalloc instead") +hipError_t hipMemAllocHost(void** ptr, size_t size); +// end doxygen deprecated management memory +/** + * @} + */ +/** + * @brief Allocates device accessible page locked (pinned) host memory + * + * This API allocates pinned host memory which is mapped into the address space of all GPUs + * in the system, the memory can be accessed directly by the GPU device, and can be read or + * written with much higher bandwidth than pageable memory obtained with functions such as + * malloc(). + * + * Using the pinned host memory, applications can implement faster data transfers for HostToDevice + * and DeviceToHost. The runtime tracks the hipHostMalloc allocations and can avoid some of the + * setup required for regular unpinned memory. + * + * When the memory accesses are infrequent, zero-copy memory can be a good choice, for coherent + * allocation. GPU can directly access the host memory over the CPU/GPU interconnect, without need + * to copy the data. + * + * Currently the allocation granularity is 4KB for the API. + * + * Developers need to choose proper allocation flag with consideration of synchronization. + * + * @param[out] ptr Pointer to the allocated host pinned memory + * @param[in] size Requested memory size in bytes + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * @param[in] flags Type of host memory allocation. See the description of flags in + * hipSetDeviceFlags. + * + * If no input for flags, it will be the default pinned memory allocation on the host. + * + * @returns #hipSuccess, #hipErrorOutOfMemory + * + * + * @see hipSetDeviceFlags, hiptHostFree + */ +hipError_t hipHostMalloc(void** ptr, size_t size, unsigned int flags); +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup MemoryM Managed Memory + * + * @ingroup Memory + * @{ + * This section describes the managed memory management functions of HIP runtime API. + * + * @note The managed memory management APIs are implemented on Linux, under developement + * on Windows. + * + */ +/** + * @brief Allocates memory that will be automatically managed by HIP. + * + * This API is used for managed memory, allows data be shared and accessible to both CPU and + * GPU using a single pointer. + * + * The API returns the allocation pointer, managed by HMM, can be used further to execute kernels + * on device and fetch data between the host and device as needed. + * + * If HMM is not supported, the function behaves the same as @p hipMallocHost . + * + * @note It is recommend to do the capability check before call this API. + * + * @param [out] dev_ptr - pointer to allocated device memory + * @param [in] size - requested allocation size in bytes, it should be granularity of 4KB + * @param [in] flags - must be either hipMemAttachGlobal or hipMemAttachHost + * (defaults to hipMemAttachGlobal) + * + * @returns #hipSuccess, #hipErrorMemoryAllocation, #hipErrorNotSupported, #hipErrorInvalidValue + * + */ +hipError_t hipMallocManaged(void** dev_ptr, size_t size, + unsigned int flags __dparm(hipMemAttachGlobal)); +/** + * @brief Prefetches memory to the specified destination device using HIP. + * + * @param [in] dev_ptr pointer to be prefetched + * @param [in] count size in bytes for prefetching + * @param [in] device destination device to prefetch to + * @param [in] stream stream to enqueue prefetch operation + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPrefetchAsync(const void* dev_ptr, size_t count, int device, + hipStream_t stream __dparm(0)); +/** + * @brief Prefetches memory to the specified destination device using HIP. + * + * @param [in] dev_ptr pointer to be prefetched + * @param [in] count size in bytes for prefetching + * @param [in] location destination location to prefetch to + * @param [in] flags flags for future use, must be zero now. + * @param [in] stream stream to enqueue prefetch operation + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPrefetchAsync_v2(const void* dev_ptr, size_t count, hipMemLocation location, + unsigned int flags, hipStream_t stream __dparm(0)); +/** + * @brief Advise about the usage of a given memory range to HIP. + * + * @param [in] dev_ptr pointer to memory to set the advice for + * @param [in] count size in bytes of the memory range, it should be CPU page size alligned. + * @param [in] advice advice to be applied for the specified memory range + * @param [in] device device to apply the advice for + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * This HIP API advises about the usage to be applied on unified memory allocation in the + * range starting from the pointer address devPtr, with the size of count bytes. + * The memory range must refer to managed memory allocated via the API hipMallocManaged, and the + * range will be handled with proper round down and round up respectively in the driver to + * be aligned to CPU page size, the same way as corresponding CUDA API behaves in CUDA version 8.0 + * and afterwards. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemAdvise(const void* dev_ptr, size_t count, hipMemoryAdvise advice, int device); +/** + * @brief Advise about the usage of a given memory range to HIP. + * + * @param [in] dev_ptr pointer to memory to set the advice for + * @param [in] count size in bytes of the memory range, it should be CPU page size alligned. + * @param [in] advice advice to be applied for the specified memory range + * @param [in] location location to apply the advice for + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * This HIP API advises about the usage to be applied on unified memory allocation in the + * range starting from the pointer address devPtr, with the size of count bytes. + * The memory range must refer to managed memory allocated via the API hipMallocManaged, and the + * range will be handled with proper round down and round up respectively in the driver to + * be aligned to CPU page size, the same way as corresponding CUDA API behaves in CUDA version 8.0 + * and afterwards. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemAdvise_v2(const void* dev_ptr, size_t count, hipMemoryAdvise advice, + hipMemLocation location); +/** + * @brief Query an attribute of a given memory range in HIP. + * + * @param [in,out] data a pointer to a memory location where the result of each + * attribute query will be written to + * @param [in] data_size the size of data + * @param [in] attribute the attribute to query + * @param [in] dev_ptr start of the range to query + * @param [in] count size of the range to query + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemRangeGetAttribute(void* data, size_t data_size, hipMemRangeAttribute attribute, + const void* dev_ptr, size_t count); +/** + * @brief Query attributes of a given memory range in HIP. + * + * @param [in,out] data a two-dimensional array containing pointers to memory locations + * where the result of each attribute query will be written to + * @param [in] data_sizes an array, containing the sizes of each result + * @param [in] attributes the attribute to query + * @param [in] num_attributes an array of attributes to query (numAttributes and the number + * of attributes in this array should match) + * @param [in] dev_ptr start of the range to query + * @param [in] count size of the range to query + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemRangeGetAttributes(void** data, size_t* data_sizes, + hipMemRangeAttribute* attributes, size_t num_attributes, + const void* dev_ptr, size_t count); +/** + * @brief Attach memory to a stream asynchronously in HIP. + * + * @param [in] stream - stream in which to enqueue the attach operation + * @param [in] dev_ptr - pointer to memory (must be a pointer to managed memory or + * to a valid host-accessible region of system-allocated memory) + * @param [in] length - length of memory (defaults to zero) + * @param [in] flags - must be one of hipMemAttachGlobal, hipMemAttachHost or + * hipMemAttachSingle (defaults to hipMemAttachSingle) + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @warning This API is under development. Currently it is a no-operation (NOP) + * function on AMD GPUs and returns #hipSuccess. + */ +hipError_t hipStreamAttachMemAsync(hipStream_t stream, void* dev_ptr, size_t length __dparm(0), + unsigned int flags __dparm(hipMemAttachSingle)); +// end doxygen Managed Memory +/** + * @} + */ + +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup StreamO Stream Ordered Memory Allocator + * @{ + * @ingroup Memory + * This section describes Stream Ordered Memory Allocator functions of HIP runtime API. + * + * The asynchronous allocator allows the user to allocate and free in stream order. + * All asynchronous accesses of the allocation must happen between the stream executions of + * the allocation and the free. If the memory is accessed outside of the promised stream order, + * a use before allocation / use after free error will cause undefined behavior. + * + * The allocator is free to reallocate the memory as long as it can guarantee that compliant memory + * accesses will not overlap temporally. The allocator may refer to internal stream ordering as well + * as inter-stream dependencies (such as HIP events and null stream dependencies) when establishing + * the temporal guarantee. The allocator may also insert inter-stream dependencies to establish + * the temporal guarantee. Whether or not a device supports the integrated stream ordered memory + * allocator may be queried by calling @p hipDeviceGetAttribute with the device attribute + * @p hipDeviceAttributeMemoryPoolsSupported + * + * @note APIs in this section are implemented on Linux, under development on Windows. + */ + +/** + * @brief Allocates memory with stream ordered semantics + * + * Inserts a memory allocation operation into @p stream. + * A pointer to the allocated memory is returned immediately in *dptr. + * The allocation must not be accessed until the allocation operation completes. + * The allocation comes from the memory pool associated with the stream's device. + * + * @note The default memory pool of a device contains device memory from that device. + * @note Basic stream ordering allows future work submitted into the same stream to use the + * allocation. Stream query, stream synchronize, and HIP events can be used to guarantee that + * the allocation operation completes before work submitted in a separate stream runs. + * @note During stream capture, this function results in the creation of an allocation node. + * In this case, the allocation is owned by the graph instead of the memory pool. The memory + * pool's properties are used to set the node's creation parameters. + * + * @param [out] dev_ptr Returned device pointer of memory allocation + * @param [in] size Number of bytes to allocate + * @param [in] stream The stream establishing the stream ordering contract and + * the memory pool to allocate from + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported, #hipErrorOutOfMemory + * + * @see hipMallocFromPoolAsync, hipFreeAsync, hipMemPoolTrimTo, hipMemPoolGetAttribute, + * hipDeviceSetMemPool, hipMemPoolSetAttribute, hipMemPoolSetAccess, hipMemPoolGetAccess + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMallocAsync(void** dev_ptr, size_t size, hipStream_t stream); +/** + * @brief Frees memory with stream ordered semantics + * + * Inserts a free operation into @p stream. + * The allocation must not be used after stream execution reaches the free. + * After this API returns, accessing the memory from any subsequent work launched on the GPU + * or querying its pointer attributes results in undefined behavior. + * + * @note During stream capture, this function results in the creation of a free node and + * must therefore be passed the address of a graph allocation. + * + * @param [in] dev_ptr Pointer to device memory to free + * @param [in] stream The stream, where the destruciton will occur according to the execution order + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @see hipMallocFromPoolAsync, hipMallocAsync, hipMemPoolTrimTo, hipMemPoolGetAttribute, + * hipDeviceSetMemPool, hipMemPoolSetAttribute, hipMemPoolSetAccess, hipMemPoolGetAccess + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipFreeAsync(void* dev_ptr, hipStream_t stream); +/** + * @brief Releases freed memory back to the OS + * + * Releases memory back to the OS until the pool contains fewer than @p min_bytes_to_keep + * reserved bytes, or there is no more memory that the allocator can safely release. + * The allocator cannot release OS allocations that back outstanding asynchronous allocations. + * The OS allocations may happen at different granularity from the user allocations. + * + * @note Allocations that have not been freed count as outstanding. + * @note Allocations that have been asynchronously freed but whose completion has + * not been observed on the host (eg. by a synchronize) can count as outstanding. + * + * @param[in] mem_pool The memory pool to trim allocations + * @param[in] min_bytes_to_hold If the pool has less than min_bytes_to_hold reserved, + * then the TrimTo operation is a no-op. Otherwise the memory pool will contain + * at least min_bytes_to_hold bytes reserved after the operation. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipMallocFromPoolAsync, hipMallocAsync, hipFreeAsync, hipMemPoolGetAttribute, + * hipDeviceSetMemPool, hipMemPoolSetAttribute, hipMemPoolSetAccess, hipMemPoolGetAccess + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPoolTrimTo(hipMemPool_t mem_pool, size_t min_bytes_to_hold); +/** + * @brief Sets attributes of a memory pool + * + * Supported attributes are: + * - @p hipMemPoolAttrReleaseThreshold: (value type = cuuint64_t) + * Amount of reserved memory in bytes to hold onto before trying + * to release memory back to the OS. When more than the release + * threshold bytes of memory are held by the memory pool, the + * allocator will try to release memory back to the OS on the + * next call to stream, event or context synchronize. (default 0) + * - @p hipMemPoolReuseFollowEventDependencies: (value type = int) + * Allow @p hipMallocAsync to use memory asynchronously freed + * in another stream as long as a stream ordering dependency + * of the allocating stream on the free action exists. + * HIP events and null stream interactions can create the required + * stream ordered dependencies. (default enabled) + * - @p hipMemPoolReuseAllowOpportunistic: (value type = int) + * Allow reuse of already completed frees when there is no + * dependency between the free and allocation. (default enabled) + * - @p hipMemPoolReuseAllowInternalDependencies: (value type = int) + * Allow @p hipMallocAsync to insert new stream dependencies + * in order to establish the stream ordering required to reuse + * a piece of memory released by @p hipFreeAsync (default enabled). + * + * @param [in] mem_pool The memory pool to modify + * @param [in] attr The attribute to modify + * @param [in] value Pointer to the value to assign + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipMallocFromPoolAsync, hipMallocAsync, hipFreeAsync, hipMemPoolGetAttribute, + * hipMemPoolTrimTo, hipDeviceSetMemPool, hipMemPoolSetAccess, hipMemPoolGetAccess + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPoolSetAttribute(hipMemPool_t mem_pool, hipMemPoolAttr attr, void* value); +/** + * @brief Gets attributes of a memory pool + * + * Supported attributes are: + * - @p hipMemPoolAttrReleaseThreshold: (value type = cuuint64_t) + * Amount of reserved memory in bytes to hold onto before trying + * to release memory back to the OS. When more than the release + * threshold bytes of memory are held by the memory pool, the + * allocator will try to release memory back to the OS on the + * next call to stream, event or context synchronize. (default 0) + * - @p hipMemPoolReuseFollowEventDependencies: (value type = int) + * Allow @p hipMallocAsync to use memory asynchronously freed + * in another stream as long as a stream ordering dependency + * of the allocating stream on the free action exists. + * HIP events and null stream interactions can create the required + * stream ordered dependencies. (default enabled) + * - @p hipMemPoolReuseAllowOpportunistic: (value type = int) + * Allow reuse of already completed frees when there is no + * dependency between the free and allocation. (default enabled) + * - @p hipMemPoolReuseAllowInternalDependencies: (value type = int) + * Allow @p hipMallocAsync to insert new stream dependencies + * in order to establish the stream ordering required to reuse + * a piece of memory released by @p hipFreeAsync (default enabled). + * + * @param [in] mem_pool The memory pool to get attributes of + * @param [in] attr The attribute to get + * @param [in] value Retrieved value + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipMallocFromPoolAsync, hipMallocAsync, hipFreeAsync, + * hipMemPoolTrimTo, hipDeviceSetMemPool, hipMemPoolSetAttribute, hipMemPoolSetAccess, + * hipMemPoolGetAccess + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPoolGetAttribute(hipMemPool_t mem_pool, hipMemPoolAttr attr, void* value); +/** + * @brief Controls visibility of the specified pool between devices + * + * @param [in] mem_pool Memory pool for acccess change + * @param [in] desc_list Array of access descriptors. Each descriptor instructs the access to + * enable for a single gpu + * @param [in] count Number of descriptors in the map array. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipMallocFromPoolAsync, hipMallocAsync, hipFreeAsync, hipMemPoolGetAttribute, + * hipMemPoolTrimTo, hipDeviceSetMemPool, hipMemPoolSetAttribute, hipMemPoolGetAccess + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPoolSetAccess(hipMemPool_t mem_pool, const hipMemAccessDesc* desc_list, + size_t count); +/** + * @brief Returns the accessibility of a pool from a device + * + * Returns the accessibility of the pool's memory from the specified location. + * + * @param [out] flags Accessibility of the memory pool from the specified location/device + * @param [in] mem_pool Memory pool being queried + * @param [in] location Location/device for memory pool access + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipMallocFromPoolAsync, hipMallocAsync, hipFreeAsync, hipMemPoolGetAttribute, + * hipMemPoolTrimTo, hipDeviceSetMemPool, hipMemPoolSetAttribute, hipMemPoolSetAccess + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPoolGetAccess(hipMemAccessFlags* flags, hipMemPool_t mem_pool, + hipMemLocation* location); +/** + * @brief Creates a memory pool + * + * Creates a HIP memory pool and returns the handle in @p mem_pool. The @p pool_props determines + * the properties of the pool such as the backing device and IPC capabilities. + * + * By default, the memory pool will be accessible from the device it is allocated on. + * + * @param [out] mem_pool Contains createed memory pool + * @param [in] pool_props Memory pool properties + * + * @note Specifying hipMemHandleTypeNone creates a memory pool that will not support IPC. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @see hipMallocFromPoolAsync, hipMallocAsync, hipFreeAsync, hipMemPoolGetAttribute, + * hipMemPoolDestroy, hipMemPoolTrimTo, hipDeviceSetMemPool, hipMemPoolSetAttribute, + * hipMemPoolSetAccess, hipMemPoolGetAccess + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPoolCreate(hipMemPool_t* mem_pool, const hipMemPoolProps* pool_props); +/** + * @brief Destroys the specified memory pool + * + * If any pointers obtained from this pool haven't been freed or + * the pool has free operations that haven't completed + * when @p hipMemPoolDestroy is invoked, the function will return immediately and the + * resources associated with the pool will be released automatically + * once there are no more outstanding allocations. + * + * Destroying the current mempool of a device sets the default mempool of + * that device as the current mempool for that device. + * + * @param [in] mem_pool Memory pool for destruction + * + * @note A device's default memory pool cannot be destroyed. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipMallocFromPoolAsync, hipMallocAsync, hipFreeAsync, hipMemPoolGetAttribute, + * hipMemPoolCreate hipMemPoolTrimTo, hipDeviceSetMemPool, hipMemPoolSetAttribute, + * hipMemPoolSetAccess, hipMemPoolGetAccess + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPoolDestroy(hipMemPool_t mem_pool); +/** + * @brief Allocates memory from a specified pool with stream ordered semantics. + * + * Inserts an allocation operation into @p stream. + * A pointer to the allocated memory is returned immediately in @p dev_ptr. + * The allocation must not be accessed until the allocation operation completes. + * The allocation comes from the specified memory pool. + * + * @note The specified memory pool may be from a device different than that of the specified @p + * stream. + * + * Basic stream ordering allows future work submitted into the same stream to use the allocation. + * Stream query, stream synchronize, and HIP events can be used to guarantee that the allocation + * operation completes before work submitted in a separate stream runs. + * + * @note During stream capture, this function results in the creation of an allocation node. In this + * case, the allocation is owned by the graph instead of the memory pool. The memory pool's + * properties are used to set the node's creation parameters. + * + * @param [out] dev_ptr Returned device pointer + * @param [in] size Number of bytes to allocate + * @param [in] mem_pool The pool to allocate from + * @param [in] stream The stream establishing the stream ordering semantic + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported, #hipErrorOutOfMemory + * + * @see hipMallocAsync, hipFreeAsync, hipMemPoolGetAttribute, hipMemPoolCreate + * hipMemPoolTrimTo, hipDeviceSetMemPool, hipMemPoolSetAttribute, hipMemPoolSetAccess, + * hipMemPoolGetAccess, + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMallocFromPoolAsync(void** dev_ptr, size_t size, hipMemPool_t mem_pool, + hipStream_t stream); +/** + * @brief Exports a memory pool to the requested handle type. + * + * Given an IPC capable mempool, create an OS handle to share the pool with another process. + * A recipient process can convert the shareable handle into a mempool with @p + * hipMemPoolImportFromShareableHandle. Individual pointers can then be shared with the @p + * hipMemPoolExportPointer and @p hipMemPoolImportPointer APIs. The implementation of what the + * shareable handle is and how it can be transferred is defined by the requested handle type. + * + * @note To create an IPC capable mempool, create a mempool with a @p hipMemAllocationHandleType + * other than @p hipMemHandleTypeNone. + * + * @param [out] shared_handle Pointer to the location in which to store the requested handle + * @param [in] mem_pool Pool to export + * @param [in] handle_type The type of handle to create + * @param [in] flags Must be 0 + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorOutOfMemory + * + * @see hipMemPoolImportFromShareableHandle + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPoolExportToShareableHandle(void* shared_handle, hipMemPool_t mem_pool, + hipMemAllocationHandleType handle_type, + unsigned int flags); +/** + * @brief Imports a memory pool from a shared handle. + * + * Specific allocations can be imported from the imported pool with @p hipMemPoolImportPointer. + * + * @note Imported memory pools do not support creating new allocations. + * As such imported memory pools may not be used in @p hipDeviceSetMemPool + * or @p hipMallocFromPoolAsync calls. + * + * @param [out] mem_pool Returned memory pool + * @param [in] shared_handle OS handle of the pool to open + * @param [in] handle_type The type of handle being imported + * @param [in] flags Must be 0 + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorOutOfMemory + * + * @see hipMemPoolExportToShareableHandle + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPoolImportFromShareableHandle(hipMemPool_t* mem_pool, void* shared_handle, + hipMemAllocationHandleType handle_type, + unsigned int flags); +/** + * @brief Export data to share a memory pool allocation between processes. + * + * Constructs @p export_data for sharing a specific allocation from an already shared memory pool. + * The recipient process can import the allocation with the @p hipMemPoolImportPointer api. + * The data is not a handle and may be shared through any IPC mechanism. + * + * @param[out] export_data Returned export data + * @param[in] dev_ptr Pointer to memory being exported + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorOutOfMemory + * + * @see hipMemPoolImportPointer + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPoolExportPointer(hipMemPoolPtrExportData* export_data, void* dev_ptr); +/** + * @brief Import a memory pool allocation from another process. + * + * Returns in @p dev_ptr a pointer to the imported memory. + * The imported memory must not be accessed before the allocation operation completes + * in the exporting process. The imported memory must be freed from all importing processes before + * being freed in the exporting process. The pointer may be freed with @p hipFree + * or @p hipFreeAsync. If @p hipFreeAsync is used, the free must be completed + * on the importing process before the free operation on the exporting process. + * + * @note The @p hipFreeAsync api may be used in the exporting process before + * the @p hipFreeAsync operation completes in its stream as long as the + * @p hipFreeAsync in the exporting process specifies a stream with + * a stream dependency on the importing process's @p hipFreeAsync. + * + * @param [out] dev_ptr Pointer to imported memory + * @param [in] mem_pool Memory pool from which to import a pointer + * @param [in] export_data Data specifying the memory to import + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized, #hipErrorOutOfMemory + * + * @see hipMemPoolExportPointer + * + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemPoolImportPointer(void** dev_ptr, hipMemPool_t mem_pool, + hipMemPoolPtrExportData* export_data); +/** + * @brief Sets memory pool for memory location and allocation type. + * + * + */ +hipError_t hipMemSetMemPool(hipMemLocation* location, hipMemAllocationType type, hipMemPool_t pool); +/** + * @brief Retrieves memory pool for memory location and allocation type. + * + * + */ +hipError_t hipMemGetMemPool(hipMemPool_t* pool, hipMemLocation* location, + hipMemAllocationType type); +// Doxygen end of ordered memory allocator +/** + * @} + */ + +/** + * @brief Allocate device accessible page locked host memory + * + * @param[out] ptr Pointer to the allocated host pinned memory + * @param[in] size Requested memory size in bytes + * @param[in] flags Type of host memory allocation see below + * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * + * Flags: + * - #hipHostAllocDefault Default pinned memory allocation on the host. + * - #hipHostAllocPortable Memory is considered allocated by all contexts. + * - #hipHostAllocMapped Map the allocation into the address space for the current device. + * - #hipHostAllocWriteCombined Allocates the memory as write-combined. + * - #hipHostAllocUncached Allocate the host memory on extended fine grained access system + * memory pool + * + * @return #hipSuccess, #hipErrorOutOfMemory, #hipErrorInvalidValue + */ +hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags); +/** + * @brief Get Device pointer from Host Pointer allocated through hipHostMalloc + * + * @param[out] devPtr Device Pointer mapped to passed host pointer + * @param[in] hstPtr Host Pointer allocated through hipHostMalloc + * @param[in] flags Flags to be passed for extension + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorOutOfMemory + * + * @see hipSetDeviceFlags, hipHostMalloc + */ +hipError_t hipHostGetDevicePointer(void** devPtr, void* hstPtr, unsigned int flags); +/** + * @brief Return flags associated with host pointer + * + * @param[out] flagsPtr Memory location to store flags + * @param[in] hostPtr Host Pointer allocated through hipHostMalloc + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipHostMalloc + */ +hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr); +/** + * @brief Register host memory so it can be accessed from the current device. + * + * @param[out] hostPtr Pointer to host memory to be registered. + * @param[in] sizeBytes Size of the host memory + * @param[in] flags See below. + * + * Flags: + * - #hipHostRegisterDefault Memory is Mapped and Portable + * - #hipHostRegisterPortable Memory is considered registered by all contexts. HIP only supports + * one context so this is always assumed true. + * - #hipHostRegisterMapped Map the allocation into the address space for the current device. + * The device pointer can be obtained with #hipHostGetDevicePointer. + * - #hipExtHostRegisterUncached Map the host memory onto extended fine grained access system + * memory pool. + * + * After registering the memory, use #hipHostGetDevicePointer to obtain the mapped device pointer. + * On many systems, the mapped device pointer will have a different value than the mapped host + * pointer. Applications must use the device pointer in device code, and the host pointer in host + * code. + * + * On some systems, registered memory is pinned. On some systems, registered memory may not be + * actually be pinned but uses OS or hardware facilities to all GPU access to the host memory. + * + * Developers are strongly encouraged to register memory blocks which are aligned to the host + * cache-line size. (typically 64-bytes but can be obtains from the CPUID instruction). + * + * If registering non-aligned pointers, the application must take care when register pointers from + * the same cache line on different devices. HIP's coarse-grained synchronization model does not + * guarantee correct results if different devices write to different parts of the same cache block - + * typically one of the writes will "win" and overwrite data from the other registered memory + * region. + * + * @returns #hipSuccess, #hipErrorOutOfMemory + * + * @see hipHostUnregister, hipHostGetFlags, hipHostGetDevicePointer + */ +hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags); +/** + * @brief Un-register host pointer + * + * @param[in] hostPtr Host pointer previously registered with #hipHostRegister + * @returns Error code + * + * @see hipHostRegister + */ +hipError_t hipHostUnregister(void* hostPtr); +/** + * Allocates at least width (in bytes) * height bytes of linear memory + * Padding may occur to ensure alighnment requirements are met for the given row + * The change in width size due to padding will be returned in *pitch. + * Currently the alignment is set to 128 bytes + * + * @param[out] ptr Pointer to the allocated device memory + * @param[out] pitch Pitch for allocation (in bytes) + * @param[in] width Requested pitched allocation width (in bytes) + * @param[in] height Requested pitched allocation height + * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * + * @returns Error code + * + * @see hipMalloc, hipFree, hipMallocArray, hipFreeArray, hipHostFree, hipMalloc3D, + * hipMalloc3DArray, hipHostMalloc + */ +hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height); +/** + * Allocates at least width (in bytes) * height bytes of linear memory + * Padding may occur to ensure alighnment requirements are met for the given row + * The change in width size due to padding will be returned in *pitch. + * Currently the alignment is set to 128 bytes + * + * @param[out] dptr Pointer to the allocated device memory + * @param[out] pitch Pitch for allocation (in bytes) + * @param[in] widthInBytes Requested pitched allocation width (in bytes) + * @param[in] height Requested pitched allocation height + * @param[in] elementSizeBytes The size of element bytes, should be 4, 8 or 16 + * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * The intended usage of pitch is as a separate parameter of the allocation, used to compute + * addresses within the 2D array. Given the row and column of an array element of type T, the + * address is computed as: T* pElement = (T*)((char*)BaseAddress + Row * Pitch) + Column; + * + * @returns Error code + * + * @see hipMalloc, hipFree, hipMallocArray, hipFreeArray, hipHostFree, hipMalloc3D, + * hipMalloc3DArray, hipHostMalloc + */ +hipError_t hipMemAllocPitch(hipDeviceptr_t* dptr, size_t* pitch, size_t widthInBytes, size_t height, + unsigned int elementSizeBytes); +/** + * @brief Free memory allocated by the HIP-Clang hip memory allocation API. + * This API performs an implicit hipDeviceSynchronize() call. + * If pointer is NULL, the hip runtime is initialized and hipSuccess is returned. + * + * @param[in] ptr Pointer to memory to be freed + * @returns #hipSuccess + * @returns #hipErrorInvalidDevicePointer (if pointer is invalid, including host pointers allocated + * with hipHostMalloc) + * + * @see hipMalloc, hipMallocPitch, hipMallocArray, hipFreeArray, hipHostFree, hipMalloc3D, + * hipMalloc3DArray, hipHostMalloc + */ +hipError_t hipFree(void* ptr); +/** + * @brief Frees page-locked memory + * This API performs an implicit hipDeviceSynchronize() call. + * If pointer is NULL, the hip runtime is initialized and hipSuccess is returned. + * + * @param[in] ptr Pointer to memory to be freed + * @returns #hipSuccess, + * #hipErrorInvalidValue (if pointer is invalid, including device pointers allocated + * with hipMalloc) + * + */ +hipError_t hipFreeHost(void* ptr); +/** + * @brief Free memory allocated by the HIP-Clang hip host memory allocation API + * This API performs an implicit hipDeviceSynchronize() call. + * If pointer is NULL, the hip runtime is initialized and hipSuccess is returned. + * + * @ingroup MemoryD + * + * @param[in] ptr Pointer to memory to be freed + * @returns #hipSuccess, + * #hipErrorInvalidValue (if pointer is invalid, including device pointers allocated with + * hipMalloc) + * + * @see hipMalloc, hipMallocPitch, hipFree, hipMallocArray, hipFreeArray, hipMalloc3D, + * hipMalloc3DArray, hipHostMalloc + * + */ +hipError_t hipHostFree(void* ptr); +/** + * @brief Copy data from src to dst. + * + * It supports memory from host to device, + * device to host, device to device and host to host + * The src and dst must not overlap. + * + * For hipMemcpy, the copy is always performed by the current device (set by hipSetDevice). + * For multi-gpu or peer-to-peer configurations, it is recommended to set the current device to the + * device where the src data is physically located. For optimal peer-to-peer copies, the copy + * device must be able to access the src and dst pointers (by calling hipDeviceEnablePeerAccess with + * copy agent as the current device and src/dst as the peerDevice argument. if this is not done, + * the hipMemcpy will still work, but will perform the copy using a staging buffer on the host. + * Calling hipMemcpy with dst and src pointers that do not match the hipMemcpyKind results in + * undefined behavior. + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * @param[in] kind Kind of transfer + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorUnknown + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind); +/** + * @brief Memory copy on the stream. + * It allows single or multiple devices to do memory copy on single or multiple streams. + * The operation is akin to hipMemcpyAsync + hipStreamSynchronize. + * Since it is a sync API, it is not allowed during graph capture. + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * @param[in] kind Kind of transfer + * @param[in] stream Valid stream + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorUnknown, #hipErrorContextIsDestroyed + * + * @see hipMemcpy, hipStreamCreate, hipStreamSynchronize, hipStreamDestroy, hipSetDevice, + * hipLaunchKernelGGL + * + */ +hipError_t hipMemcpyWithStream(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, + hipStream_t stream); +/** + * @brief Copy data from Host to Device + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, const void* src, size_t sizeBytes); +/** + * @brief Copy data from Device to Host + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t sizeBytes); +/** + * @brief Copy data from Device to Device + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes); +/** + * @brief Copies from one 1D array to device memory. + * + * @param[out] dstDevice Destination device pointer + * @param[in] srcArray Source array + * @param[in] srcOffset Offset in bytes of source array + * @param[in] ByteCount Size of memory copy in bytes + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyAtoD(hipDeviceptr_t dstDevice, hipArray_t srcArray, size_t srcOffset, + size_t ByteCount); +/** + * @brief Copies from device memory to a 1D array. + * + * @param[out] dstArray Destination array + * @param[in] dstOffset Offset in bytes of destination array + * @param[in] srcDevice Source device pointer + * @param[in] ByteCount Size of memory copy in bytes + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyDtoA(hipArray_t dstArray, size_t dstOffset, hipDeviceptr_t srcDevice, + size_t ByteCount); + +/** + * @brief Copies from one 1D array to another. + * + * @param[out] dstArray Destination array + * @param[in] dstOffset Offset in bytes of destination array + * @param[in] srcArray Source array + * @param[in] srcOffset Offset in bytes of source array + * @param[in] ByteCount Size of memory copy in bytes + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyAtoA(hipArray_t dstArray, size_t dstOffset, hipArray_t srcArray, + size_t srcOffset, size_t ByteCount); +/** + * @brief Copy data from Host to Device asynchronously + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * @param[in] stream Stream identifier + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, const void* src, size_t sizeBytes, + hipStream_t stream); +/** + * @brief Copy data from Device to Host asynchronously + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * @param[in] stream Stream identifier + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream); +/** + * @brief Copy data from Device to Device asynchronously + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * @param[in] stream Stream identifier + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes, + hipStream_t stream); +/** + * @brief Copies from one 1D array to host memory. + * + * @param[out] dstHost Destination pointer + * @param[in] srcArray Source array + * @param[in] srcOffset Offset in bytes of source array + * @param[in] ByteCount Size of memory copy in bytes + * @param[in] stream Stream identifier + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyAtoHAsync(void* dstHost, hipArray_t srcArray, size_t srcOffset, + size_t ByteCount, hipStream_t stream); +/** + * @brief Copies from host memory to a 1D array. + * + * @param[out] dstArray Destination array + * @param[in] dstOffset Offset in bytes of destination array + * @param[in] srcHost Source host pointer + * @param[in] ByteCount Size of memory copy in bytes + * @param[in] stream Stream identifier + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyHtoAAsync(hipArray_t dstArray, size_t dstOffset, const void* srcHost, + size_t ByteCount, hipStream_t stream); +/** + * @brief Returns a global pointer from a module. + * @ingroup Module + * + * Returns in *dptr and *bytes the pointer and size of the global of name name located in module + * hmod. If no variable of that name exists, it returns hipErrorNotFound. Both parameters dptr and + * bytes are optional. If one of them is NULL, it is ignored and hipSuccess is returned. + * + * @param[out] dptr Returns global device pointer + * @param[out] bytes Returns global size in bytes + * @param[in] hmod Module to retrieve global from + * @param[in] name Name of global to retrieve + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotFound, #hipErrorInvalidContext + * + */ +hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, + const char* name); + +/** + * @brief Gets device pointer associated with symbol on the device. + * + * @param[out] devPtr pointer to the device associated the symbole + * @param[in] symbol pointer to the symbole of the device + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGetSymbolAddress(void** devPtr, const void* symbol); + + +/** + * @brief Gets the size of the given symbol on the device. + * + * @param[in] symbol pointer to the device symbole + * @param[out] size pointer to the size + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGetSymbolSize(size_t* size, const void* symbol); + +/** + * @brief Gets the pointer of requested HIP driver function. + * + * @param[in] symbol The Symbol name of the driver function to request. + * @param[out] pfn Output pointer to the requested driver function. + * @param[in] hipVersion The HIP version for the requested driver function symbol. + * HIP version is defined as 100*version_major + version_minor. For example, in HIP 6.1, the + * hipversion is 601, for the symbol function "hipGetDeviceProperties", the specified hipVersion 601 + * is greater or equal to the version 600, the symbol function will be handle properly as backend + * compatible function. + * + * @param[in] flags Currently only default flag is suppported. + * @param[out] symbolStatus Optional enumeration for returned status of searching for symbol driver + * function based on the input hipVersion. + * + * Returns hipSuccess if the returned pfn is addressed to the pointer of found driver function. + * + * @returns #hipSuccess, #hipErrorInvalidValue. + */ +hipError_t hipGetProcAddress(const char* symbol, void** pfn, int hipVersion, uint64_t flags, + hipDriverProcAddressQueryResult* symbolStatus); + +/** + * @brief Copies data to the given symbol on the device. + * Symbol HIP APIs allow a kernel to define a device-side data symbol which can be accessed on + * the host side. The symbol can be in __constant or device space. + * Note that the symbol name needs to be encased in the HIP_SYMBOL macro. + * This also applies to hipMemcpyFromSymbol, hipGetSymbolAddress, and hipGetSymbolSize. + * For detailed usage, see the + * memcpyToSymbol + * example in the HIP Porting Guide. + * + * + * @param[out] symbol pointer to the device symbole + * @param[in] src pointer to the source address + * @param[in] sizeBytes size in bytes to copy + * @param[in] offset offset in bytes from start of symbole + * @param[in] kind type of memory transfer + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipMemcpyToSymbol(const void* symbol, const void* src, size_t sizeBytes, + size_t offset __dparm(0), + hipMemcpyKind kind __dparm(hipMemcpyHostToDevice)); + +/** + * @brief Copies data to the given symbol on the device asynchronously. + * + * @param[out] symbol pointer to the device symbole + * @param[in] src pointer to the source address + * @param[in] sizeBytes size in bytes to copy + * @param[in] offset offset in bytes from start of symbole + * @param[in] kind type of memory transfer + * @param[in] stream stream identifier + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src, size_t sizeBytes, + size_t offset, hipMemcpyKind kind, hipStream_t stream __dparm(0)); + +/** + * @brief Copies data from the given symbol on the device. + * + * @param[out] dst Returns pointer to destinition memory address + * @param[in] symbol Pointer to the symbole address on the device + * @param[in] sizeBytes Size in bytes to copy + * @param[in] offset Offset in bytes from the start of symbole + * @param[in] kind Type of memory transfer + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipMemcpyFromSymbol(void* dst, const void* symbol, size_t sizeBytes, + size_t offset __dparm(0), + hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost)); + +/** + * @brief Copies data from the given symbol on the device asynchronously. + * + * @param[out] dst Returns pointer to destinition memory address + * @param[in] symbol pointer to the symbole address on the device + * @param[in] sizeBytes size in bytes to copy + * @param[in] offset offset in bytes from the start of symbole + * @param[in] kind type of memory transfer + * @param[in] stream stream identifier + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbol, size_t sizeBytes, size_t offset, + hipMemcpyKind kind, hipStream_t stream __dparm(0)); +/** + * @brief Copies data from src to dst asynchronously. + * + * The copy is always performed by the device associated with the specified stream. + * + * For multi-gpu or peer-to-peer configurations, it is recommended to use a stream which is + * attached to the device where the src data is physically located. + * For optimal peer-to-peer copies, the copy device must be able to access the src and dst + * pointers (by calling hipDeviceEnablePeerAccess) with copy agent as the current device and + * src/dest as the peerDevice argument. If enabling device peer access is not done, the memory copy + * will still work, but will perform the copy using a staging buffer on the host. + * + * @note If host or dst are not pinned, the memory copy will be performed synchronously. For + * best performance, use hipHostMalloc to allocate host memory that is transferred asynchronously. + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * @param[in] kind Type of memory transfer + * @param[in] stream Stream identifier + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorUnknown + * + * @see hipMemcpy, hipMemcpy2D, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, + * hipMemcpy2DFromArray, hipMemcpyArrayToArray, hipMemcpy2DArrayToArray, hipMemcpyToSymbol, + * hipMemcpyFromSymbol, hipMemcpy2DAsync, hipMemcpyToArrayAsync, hipMemcpy2DToArrayAsync, + * hipMemcpyFromArrayAsync, hipMemcpy2DFromArrayAsync, hipMemcpyToSymbolAsync, + * hipMemcpyFromSymbolAsync + */ +hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, + hipStream_t stream __dparm(0)); +/** + * @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant + * byte value value. + * + * @param[out] dst Data being filled + * @param[in] value Value to be set + * @param[in] sizeBytes Data size in bytes + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + */ +hipError_t hipMemset(void* dst, int value, size_t sizeBytes); +/** + * @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant + * byte value value. + * + * @param[out] dest Data ptr to be filled + * @param[in] value Value to be set + * @param[in] count Number of values to be set + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + */ +hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t count); +/** + * @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant + * byte value value. + * + * hipMemsetD8Async() is asynchronous with respect to the host, so the call may return before the + * memset is complete. The operation can optionally be associated to a stream by passing a non-zero + * stream argument. If stream is non-zero, the operation may overlap with operations in other + * streams. + * + * @param[out] dest Data ptr to be filled + * @param[in] value Constant value to be set + * @param[in] count Number of values to be set + * @param[in] stream Stream identifier + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + */ +hipError_t hipMemsetD8Async(hipDeviceptr_t dest, unsigned char value, size_t count, + hipStream_t stream __dparm(0)); +/** + * @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant + * short value value. + * + * @param[out] dest Data ptr to be filled + * @param[in] value Constant value to be set + * @param[in] count Number of values to be set + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + */ +hipError_t hipMemsetD16(hipDeviceptr_t dest, unsigned short value, size_t count); +/** + * @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant + * short value value. + * + * hipMemsetD16Async() is asynchronous with respect to the host, so the call may return before the + * memset is complete. The operation can optionally be associated to a stream by passing a non-zero + * stream argument. If stream is non-zero, the operation may overlap with operations in other + * streams. + * + * @param[out] dest Data ptr to be filled + * @param[in] value Constant value to be set + * @param[in] count Number of values to be set + * @param[in] stream Stream identifier + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + */ +hipError_t hipMemsetD16Async(hipDeviceptr_t dest, unsigned short value, size_t count, + hipStream_t stream __dparm(0)); +/** + * @brief Fills the memory area pointed to by dest with the constant integer + * value for specified number of times. + * + * @param[out] dest Data being filled + * @param[in] value Constant value to be set + * @param[in] count Number of values to be set + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + */ +hipError_t hipMemsetD32(hipDeviceptr_t dest, int value, size_t count); +/** + * @brief Fills the first sizeBytes bytes of the memory area pointed to by dev with the constant + * byte value value. + * + * hipMemsetAsync() is asynchronous with respect to the host, so the call may return before the + * memset is complete. The operation can optionally be associated to a stream by passing a non-zero + * stream argument. If stream is non-zero, the operation may overlap with operations in other + * streams. + * + * @param[out] dst Pointer to device memory + * @param[in] value Value to set for each byte of specified memory + * @param[in] sizeBytes Size in bytes to set + * @param[in] stream Stream identifier + * @return #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream __dparm(0)); +/** + * @brief Fills the memory area pointed to by dev with the constant integer + * value for specified number of times. + * + * hipMemsetD32Async() is asynchronous with respect to the host, so the call may return before the + * memset is complete. The operation can optionally be associated to a stream by passing a non-zero + * stream argument. If stream is non-zero, the operation may overlap with operations in other + * streams. + * + * @param[out] dst Pointer to device memory + * @param[in] value Value to set for each byte of specified memory + * @param[in] count Number of values to be set + * @param[in] stream Stream identifier + * @return #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemsetD32Async(hipDeviceptr_t dst, int value, size_t count, + hipStream_t stream __dparm(0)); +/** + * @brief Fills the memory area pointed to by dst with the constant value. + * + * @param[out] dst Pointer to 2D device memory + * @param[in] pitch Pitch size in bytes of 2D device memory, unused if height equals 1 + * @param[in] value Constant value to set for each byte of specified memory + * @param[in] width Width size in bytes in 2D memory + * @param[in] height Height size in bytes in 2D memory + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height); +/** + * @brief Fills asynchronously the memory area pointed to by dst with the constant value. + * + * @param[in] dst Pointer to 2D device memory + * @param[in] pitch Pitch size in bytes of 2D device memory, unused if height equals 1 + * @param[in] value Value to set for each byte of specified memory + * @param[in] width Width size in bytes in 2D memory + * @param[in] height Height size in bytes in 2D memory + * @param[in] stream Stream identifier + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, size_t width, size_t height, + hipStream_t stream __dparm(0)); +/** + * @brief Fills synchronously the memory area pointed to by pitchedDevPtr with the constant value. + * + * @param[in] pitchedDevPtr Pointer to pitched device memory + * @param[in] value Value to set for each byte of specified memory + * @param[in] extent Size parameters for width field in bytes in device memory + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent); +/** + * @brief Fills asynchronously the memory area pointed to by pitchedDevPtr with the constant value. + * + * @param[in] pitchedDevPtr Pointer to pitched device memory + * @param[in] value Value to set for each byte of specified memory + * @param[in] extent Size parameters for width field in bytes in device memory + * @param[in] stream Stream identifier + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, + hipStream_t stream __dparm(0)); + +/** + * @brief Fills 2D memory range of 'width' 8-bit values synchronously to the specified char value. + * Height specifies numbers of rows to set and dstPitch speicifies the number of bytes between each + * row. + * @param[in] dst Pointer to device memory + * @param[in] dstPitch Pitch of dst device pointer + * @param[in] value value to set + * @param[in] width Width of row + * @param[in] height Number of rows + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemsetD2D8(hipDeviceptr_t dst, size_t dstPitch, unsigned char value, size_t width, + size_t height); +/** + * @brief Fills 2D memory range of 'width' 8-bit values asynchronously to the specified char value. + * Height specifies numbers of rows to set and dstPitch speicifies the number of bytes between each + * row. + * @param[in] dst Pointer to device memory + * @param[in] dstPitch Pitch of dst device pointer + * @param[in] value value to set + * @param[in] width Width of row + * @param[in] height Number of rows + * @param[in] stream Stream Identifier + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemsetD2D8Async(hipDeviceptr_t dst, size_t dstPitch, unsigned char value, + size_t width, size_t height, hipStream_t stream __dparm(0)); + +/** + * @brief Fills 2D memory range of 'width' 16-bit values synchronously to the specified short + * value. Height specifies numbers of rows to set and dstPitch speicifies the number of bytes + * between each row. + * @param[in] dst Pointer to device memory + * @param[in] dstPitch Pitch of dst device pointer + * @param[in] value value to set + * @param[in] width Width of row + * @param[in] height Number of rows + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemsetD2D16(hipDeviceptr_t dst, size_t dstPitch, unsigned short value, size_t width, + size_t height); +/** + * @brief Fills 2D memory range of 'width' 16-bit values asynchronously to the specified short + * value. Height specifies numbers of rows to set and dstPitch speicifies the number of bytes + * between each row. + * @param[in] dst Pointer to device memory + * @param[in] dstPitch Pitch of dst device pointer + * @param[in] value value to set + * @param[in] width Width of row + * @param[in] height Number of rows + * @param[in] stream Stream Identifier + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemsetD2D16Async(hipDeviceptr_t dst, size_t dstPitch, unsigned short value, + size_t width, size_t height, hipStream_t stream __dparm(0)); +/** + * @brief Fills 2D memory range of 'width' 32-bit values synchronously to the specified int value. + * Height specifies numbers of rows to set and dstPitch speicifies the number of bytes between each + * row. + * @param[in] dst Pointer to device memory + * @param[in] dstPitch Pitch of dst device pointer + * @param[in] value value to set + * @param[in] width Width of row + * @param[in] height Number of rows + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemsetD2D32(hipDeviceptr_t dst, size_t dstPitch, unsigned int value, size_t width, + size_t height); +/** + * @brief Fills 2D memory range of 'width' 32-bit values asynchronously to the specified int + * value. Height specifies numbers of rows to set and dstPitch speicifies the number of bytes + * between each row. + * @param[in] dst Pointer to device memory + * @param[in] dstPitch Pitch of dst device pointer + * @param[in] value value to set + * @param[in] width Width of row + * @param[in] height Number of rows + * @param[in] stream Stream Identifier + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemsetD2D32Async(hipDeviceptr_t dst, size_t dstPitch, unsigned int value, + size_t width, size_t height, hipStream_t stream __dparm(0)); + +/** + * @brief Query memory info. + * + * On ROCM, this function gets the actual free memory left on the current device, so supports + * the cases while running multi-workload (such as multiple processes, multiple threads, and + * multiple GPUs). + * + * @warning On Windows, the free memory only accounts for memory allocated by this process and may + * be optimistic. + * + * @param[out] free Returns free memory on the current device in bytes + * @param[out] total Returns total allocatable memory on the current device in bytes + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + **/ +hipError_t hipMemGetInfo(size_t* free, size_t* total); + +/** + * @brief Get allocated memory size via memory pointer. + * + * This function gets the allocated shared virtual memory size from memory pointer. + * + * @param[in] ptr Pointer to allocated memory + * @param[out] size Returns the allocated memory size in bytes + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + **/ +hipError_t hipMemPtrGetInfo(void* ptr, size_t* size); +/** + * @brief Allocate an array on the device. + * + * @param[out] array Pointer to allocated array in device memory + * @param[in] desc Requested channel format + * @param[in] width Requested array allocation width + * @param[in] height Requested array allocation height + * @param[in] flags Requested properties of allocated array + * @returns #hipSuccess, #hipErrorOutOfMemory + * + * @see hipMalloc, hipMallocPitch, hipFree, hipFreeArray, hipHostMalloc, hipHostFree + */ +hipError_t hipMallocArray(hipArray_t* array, const hipChannelFormatDesc* desc, size_t width, + size_t height __dparm(0), unsigned int flags __dparm(hipArrayDefault)); +/** + * @brief Create an array memory pointer on the device. + * + * @param[out] pHandle Pointer to the array memory + * @param[in] pAllocateArray Requested array desciptor + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @see hipMallocArray, hipArrayDestroy, hipFreeArray + */ +hipError_t hipArrayCreate(hipArray_t* pHandle, const HIP_ARRAY_DESCRIPTOR* pAllocateArray); +/** + * @brief Destroy an array memory pointer on the device. + * + * @param[in] array Pointer to the array memory + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipFreeArray + */ +hipError_t hipArrayDestroy(hipArray_t array); +/** + * @brief Create a 3D array memory pointer on the device. + * + * @param[out] array Pointer to the 3D array memory + * @param[in] pAllocateArray Requested array desciptor + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @see hipMallocArray, hipArrayDestroy, hipFreeArray + */ +hipError_t hipArray3DCreate(hipArray_t* array, const HIP_ARRAY3D_DESCRIPTOR* pAllocateArray); +/** + * @brief Create a 3D memory pointer on the device. + * + * @param[out] pitchedDevPtr Pointer to the 3D memory + * @param[in] extent Requested extent + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @see hipMallocPitch, hipMemGetInfo, hipFree + */ +hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent); +/** + * @brief Frees an array on the device. + * + * @param[in] array Pointer to array to free + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + * + * @see hipMalloc, hipMallocPitch, hipFree, hipMallocArray, hipHostMalloc, hipHostFree + */ +hipError_t hipFreeArray(hipArray_t array); +/** + * @brief Allocate an array on the device. + * + * @param[out] array Pointer to allocated array in device memory + * @param[in] desc Requested channel format + * @param[in] extent Requested array allocation width, height and depth + * @param[in] flags Requested properties of allocated array + * @returns #hipSuccess, #hipErrorOutOfMemory + * + * @see hipMalloc, hipMallocPitch, hipFree, hipFreeArray, hipHostMalloc, hipHostFree + */ +hipError_t hipMalloc3DArray(hipArray_t* array, const struct hipChannelFormatDesc* desc, + struct hipExtent extent, unsigned int flags); +/** + * @brief Gets info about the specified array + * + * @param[out] desc - Returned array type + * @param[out] extent - Returned array shape. 2D arrays will have depth of zero + * @param[out] flags - Returned array flags + * @param[in] array - The HIP array to get info for + * + * @returns #hipSuccess, #hipErrorInvalidValue #hipErrorInvalidHandle + * + * @see hipArrayGetDescriptor, hipArray3DGetDescriptor + */ +hipError_t hipArrayGetInfo(hipChannelFormatDesc* desc, hipExtent* extent, unsigned int* flags, + hipArray_t array); +/** + * @brief Gets a 1D or 2D array descriptor + * + * @param[out] pArrayDescriptor - Returned array descriptor + * @param[in] array - Array to get descriptor of + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue #hipErrorInvalidHandle + * + * @see hipArray3DCreate, hipArray3DGetDescriptor, hipArrayCreate, hipArrayDestroy, hipMemAlloc, + * hipMemAllocHost, hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, + * hipMemcpy3D, hipMemcpy3DAsync, hipMemcpyAtoA, hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, + * hipMemcpyDtoA, hipMemcpyDtoD, hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, + * hipMemcpyHtoA, hipMemcpyHtoAAsync, hipMemcpyHtoD, hipMemcpyHtoDAsync, hipMemFree, + * hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, hipMemHostAlloc, + * hipMemHostGetDevicePointer, hipMemsetD8, hipMemsetD16, hipMemsetD32, hipArrayGetInfo + */ +hipError_t hipArrayGetDescriptor(HIP_ARRAY_DESCRIPTOR* pArrayDescriptor, hipArray_t array); +/** + * @brief Gets a 3D array descriptor + * + * @param[out] pArrayDescriptor - Returned 3D array descriptor + * @param[in] array - 3D array to get descriptor of + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue #hipErrorInvalidHandle, #hipErrorContextIsDestroyed + * + * @see hipArray3DCreate, hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, + * hipMemAllocHost, hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, + * hipMemcpy3D, hipMemcpy3DAsync, hipMemcpyAtoA, hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, + * hipMemcpyDtoA, hipMemcpyDtoD, hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, + * hipMemcpyHtoA, hipMemcpyHtoAAsync, hipMemcpyHtoD, hipMemcpyHtoDAsync, hipMemFree, + * hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, hipMemHostAlloc, + * hipMemHostGetDevicePointer, hipMemsetD8, hipMemsetD16, hipMemsetD32, hipArrayGetInfo + */ +hipError_t hipArray3DGetDescriptor(HIP_ARRAY3D_DESCRIPTOR* pArrayDescriptor, hipArray_t array); +/** + * @brief Copies data between host and device. + * + * hipMemcpy2D supports memory matrix copy from the pointed area src to the pointed area dst. + * The copy direction is defined by kind which must be one of #hipMemcpyHostToDevice, + * #hipMemcpyHostToDevice, #hipMemcpyDeviceToHost #hipMemcpyDeviceToDevice or #hipMemcpyDefault. + * Device to Device copies don't need to wait for host synchronization. + * The copy is executed on the default null tream. The src and dst must not overlap. + * dpitch and spitch are the widths in bytes in memory matrix, width cannot exceed dpitch or + * spitch. + * + * For hipMemcpy2D, the copy is always performed by the current device (set by hipSetDevice). + * For multi-gpu or peer-to-peer configurations, it is recommended to set the current device to the + * device where the src data is physically located. For optimal peer-to-peer copies, the copy device + * must be able to access the src and dst pointers (by calling hipDeviceEnablePeerAccess with copy + * agent as the current device and src/dst as the peerDevice argument. if this is not done, the + * hipMemcpy2D will still work, but will perform the copy using a staging buffer on the host. + * + * @warning Calling hipMemcpy2D with dst and src pointers that do not match the hipMemcpyKind + * results in undefined behavior. + * + * @param[in] dst Destination memory address + * @param[in] dpitch Pitch size in bytes of destination memory + * @param[in] src Source memory address + * @param[in] spitch Pitch size in bytes of source memory + * @param[in] width Width size in bytes of matrix transfer (columns) + * @param[in] height Height size in bytes of matrix transfer (rows) + * @param[in] kind Type of transfer + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind); +/** + * @brief Copies memory for 2D arrays. + * @param[in] pCopy Parameters for the memory copy + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2D, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, + * hipMemcpyToSymbol, hipMemcpyAsync + */ +hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy); +/** + * @brief Copies memory for 2D arrays. + * @param[in] pCopy Parameters for the memory copy + * @param[in] stream Stream to use + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2D, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, + * hipMemcpyToSymbol, hipMemcpyAsync + */ +hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D* pCopy, hipStream_t stream __dparm(0)); +/** + * @brief Copies data between host and device asynchronously. + * + * hipMemcpy2DAsync supports memory matrix copy from the pointed area src to the pointed area dst. + * The copy direction is defined by kind which must be one of #hipMemcpyHostToDevice, + * #hipMemcpyDeviceToHost, #hipMemcpyDeviceToDevice or #hipMemcpyDefault. + * dpitch and spitch are the widths in bytes for memory matrix corresponds to dst and src. + * width cannot exceed dpitch or spitch. + * + * The copy is always performed by the device associated with the specified stream. + * The API is asynchronous with respect to the host, so the call may return before the copy is + * complete. The copy can optionally be excuted in a specific stream by passing a non-zero stream + * argument, for HostToDevice or DeviceToHost copies, the copy can overlap with operations + * in other streams. + * + * For multi-gpu or peer-to-peer configurations, it is recommended to use a stream which is + * attached to the device where the src data is physically located. + * + * For optimal peer-to-peer copies, the copy device must be able to access the src and dst pointers + * (by calling hipDeviceEnablePeerAccess) with copy agent as the current device and src/dst as the + * peerDevice argument. If enabling device peer access is not done, the API will still work, but + * will perform the copy using a staging buffer on the host. + * + * @note If host or dst are not pinned, the memory copy will be performed synchronously. For + * best performance, use hipHostMalloc to allocate host memory that is transferred asynchronously. + * + * @param[in] dst Pointer to destination memory address + * @param[in] dpitch Pitch size in bytes of destination memory + * @param[in] src Pointer to source memory address + * @param[in] spitch Pitch size in bytes of source memory + * @param[in] width Width of matrix transfer (columns in bytes) + * @param[in] height Height of matrix transfer (rows) + * @param[in] kind Type of transfer + * @param[in] stream Stream to use + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind, hipStream_t stream __dparm(0)); +/** + * @brief Copies data between host and device. + * + * @param[in] dst Destination memory address + * @param[in] wOffset Destination starting X offset + * @param[in] hOffset Destination starting Y offset + * @param[in] src Source memory address + * @param[in] spitch Pitch of source memory + * @param[in] width Width of matrix transfer (columns in bytes) + * @param[in] height Height of matrix transfer (rows) + * @param[in] kind Type of transfer + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpyToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy2DToArray(hipArray_t dst, size_t wOffset, size_t hOffset, const void* src, + size_t spitch, size_t width, size_t height, hipMemcpyKind kind); +/** + * @brief Copies data between host and device. + * + * @param[in] dst Destination memory address + * @param[in] wOffset Destination starting X offset + * @param[in] hOffset Destination starting Y offset + * @param[in] src Source memory address + * @param[in] spitch Pitch of source memory + * @param[in] width Width of matrix transfer (columns in bytes) + * @param[in] height Height of matrix transfer (rows) + * @param[in] kind Type of transfer + * @param[in] stream Accelerator view which the copy is being enqueued + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpyToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy2DToArrayAsync(hipArray_t dst, size_t wOffset, size_t hOffset, const void* src, + size_t spitch, size_t width, size_t height, hipMemcpyKind kind, + hipStream_t stream __dparm(0)); +/** + * @brief Copies data between host and device. + * + * @param[in] dst Destination memory address + * @param[in] wOffsetDst Destination starting X offset + * @param[in] hOffsetDst Destination starting Y offset + * @param[in] src Source memory address + * @param[in] wOffsetSrc Source starting X offset + * @param[in] hOffsetSrc Source starting Y offset (columns in bytes) + * @param[in] width Width of matrix transfer (columns in bytes) + * @param[in] height Height of matrix transfer (rows) + * @param[in] kind Type of transfer + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpyToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy2DArrayToArray(hipArray_t dst, size_t wOffsetDst, size_t hOffsetDst, + hipArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, + size_t width, size_t height, hipMemcpyKind kind); +/** + * @brief Copies data between host and device [Deprecated] + * + * @ingroup MemoryD + * + * @param[in] dst Destination memory address + * @param[in] wOffset Destination starting X offset + * @param[in] hOffset Destination starting Y offset + * @param[in] src Source memory address + * @param[in] count size in bytes to copy + * @param[in] kind Type of transfer + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + * @warning This API is deprecated. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipMemcpyToArray(hipArray_t dst, size_t wOffset, size_t hOffset, const void* src, + size_t count, hipMemcpyKind kind); +/** + * @brief Copies data between host and device [Deprecated] + * + * @ingroup MemoryD + * + * @param[in] dst Destination memory address + * @param[in] srcArray Source memory address + * @param[in] wOffset Source starting X offset + * @param[in] hOffset Source starting Y offset + * @param[in] count Size in bytes to copy + * @param[in] kind Type of transfer + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + * @warning This API is deprecated. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, size_t wOffset, size_t hOffset, + size_t count, hipMemcpyKind kind); +/** + * @brief Copies data between host and device. + * + * @param[in] dst Destination memory address + * @param[in] dpitch Pitch of destination memory + * @param[in] src Source memory address + * @param[in] wOffset Source starting X offset + * @param[in] hOffset Source starting Y offset + * @param[in] width Width of matrix transfer (columns in bytes) + * @param[in] height Height of matrix transfer (rows) + * @param[in] kind Type of transfer + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy2DFromArray(void* dst, size_t dpitch, hipArray_const_t src, size_t wOffset, + size_t hOffset, size_t width, size_t height, hipMemcpyKind kind); +/** + * @brief Copies data between host and device asynchronously. + * + * @param[in] dst Destination memory address + * @param[in] dpitch Pitch of destination memory + * @param[in] src Source memory address + * @param[in] wOffset Source starting X offset + * @param[in] hOffset Source starting Y offset + * @param[in] width Width of matrix transfer (columns in bytes) + * @param[in] height Height of matrix transfer (rows) + * @param[in] kind Type of transfer + * @param[in] stream Accelerator view which the copy is being enqueued + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy2DFromArrayAsync(void* dst, size_t dpitch, hipArray_const_t src, size_t wOffset, + size_t hOffset, size_t width, size_t height, + hipMemcpyKind kind, hipStream_t stream __dparm(0)); +/** + * @brief Copies data between host and device. + * + * @param[in] dst Destination memory address + * @param[in] srcArray Source array + * @param[in] srcOffset Offset in bytes of source array + * @param[in] count Size of memory copy in bytes + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpyAtoH(void* dst, hipArray_t srcArray, size_t srcOffset, size_t count); +/** + * @brief Copies data between host and device. + * + * @param[in] dstArray Destination memory address + * @param[in] dstOffset Offset in bytes of destination array + * @param[in] srcHost Source host pointer + * @param[in] count Size of memory copy in bytes + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpyHtoA(hipArray_t dstArray, size_t dstOffset, const void* srcHost, size_t count); +/** + * @brief Copies data between host and device. + * + * @param[in] p 3D memory copy parameters + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p); +/** + * @brief Copies data between host and device asynchronously. + * + * @param[in] p 3D memory copy parameters + * @param[in] stream Stream to use + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms* p, hipStream_t stream __dparm(0)); +/** + * @brief Copies data between host and device. + * + * @param[in] pCopy 3D memory copy parameters + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipDrvMemcpy3D(const HIP_MEMCPY3D* pCopy); +/** + * @brief Copies data between host and device asynchronously. + * + * @param[in] pCopy 3D memory copy parameters + * @param[in] stream Stream to use + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipDrvMemcpy3DAsync(const HIP_MEMCPY3D* pCopy, hipStream_t stream); +/** + * @brief Get information on memory allocations. + * + * @param [out] pbase - BAse pointer address + * @param [out] psize - Size of allocation + * @param [in] dptr- Device Pointer + * + * @returns #hipSuccess, #hipErrorNotFound + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + */ +hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize, hipDeviceptr_t dptr); + +/** + * @brief Perform Batch of 1D copies + * + * @param [in] dsts - Array of destination pointers + * @param [in] srcs - Array of source pointers. + * @param [in] sizes - Array of sizes for memcpy operations + * @param [in] count - Size of dsts, srcs and sizes arrays + * @param [in] attrs - Array of memcpy attributes (not supported) + * @param [in] attrsIdxs - Array of indices to map attrs to copies (not supported) + * @param [in] numAttrs - Size of attrs and attrsIdxs arrays (not supported) + * @param [in] failIdx - Pointer to a location to return failure index inside the batch + * @param [in] stream - stream used to enqueue operations in. + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemcpyBatchAsync(void** dsts, void** srcs, size_t* sizes, size_t count, + hipMemcpyAttributes* attrs, size_t* attrsIdxs, size_t numAttrs, + size_t* failIdx, hipStream_t stream __dparm(0)); + +/** + * @brief Perform Batch of 3D copies + * + * @param [in] numOps - Total number of memcpy operations. + * @param [in] opList - Array of size numOps containing the actual memcpy operations. + * @param [in] failIdx - Pointer to a location to return the index of the copy where a failure + * - was encountered. + * @param [in] flags - Flags for future use, must be zero now. + * @param [in] stream - The stream to enqueue the operations in. + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemcpy3DBatchAsync(size_t numOps, struct hipMemcpy3DBatchOp* opList, size_t* failIdx, + unsigned long long flags, hipStream_t stream __dparm(0)); + +/** + * @brief Performs 3D memory copies between devices + * This API is asynchronous with respect to host + * + * @param [in] p - Parameters for memory copy + * + * @returns #hipSuccess, #hipErrorInvalidValue, hipErrorInvalidDevice + */ +hipError_t hipMemcpy3DPeer(hipMemcpy3DPeerParms* p); + +/** + * @brief Performs 3D memory copies between devices asynchronously + * + * @param [in] p - Parameters for memory copy + * @param [in] stream - Stream to enqueue operation in. + * + * @returns #hipSuccess, #hipErrorInvalidValue, hipErrorInvalidDevice + */ +hipError_t hipMemcpy3DPeerAsync(hipMemcpy3DPeerParms* p, hipStream_t stream __dparm(0)); +// doxygen end Memory +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup PeerToPeer PeerToPeer Device Memory Access + * @{ + * @ingroup API + * This section describes the PeerToPeer device memory access functions of HIP runtime API. + */ +/** + * @brief Determines if a device can access a peer device's memory. + * + * @param [out] canAccessPeer - Returns the peer access capability (0 or 1) + * @param [in] deviceId - The device accessing the peer device memory. + * @param [in] peerDeviceId - Peer device where memory is physically located + * + * The value of @p canAccessPeer, + * + * Returns "1" if the specified @p deviceId is capable of directly accessing memory physically + * located on @p peerDeviceId, + * + * Returns "0" if the specified @p deviceId is not capable of directly accessing memory physically + * located on @p peerDeviceId. + * + * Returns "0" if @p deviceId == @p peerDeviceId, both are valid devices, + * however, a device is not a peer of itself. + * + * Returns #hipErrorInvalidDevice if deviceId or peerDeviceId are not valid devices + * + * @returns #hipSuccess, #hipErrorInvalidDevice + * + */ +hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int deviceId, int peerDeviceId); +/** + * @brief Enables direct access to memory allocations on a peer device. + * + * When this API is successful, all memory allocations on peer device will be mapped into the + * address space of the current device. In addition, any future memory allocation on the + * peer device will remain accessible from the current device, until the access is disabled using + * hipDeviceDisablePeerAccess or device is reset using hipDeviceReset. + * + * @param [in] peerDeviceId - Peer device to enable direct access to from the current device + * @param [in] flags - Reserved for future use, must be zero + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue, + * @returns #hipErrorPeerAccessAlreadyEnabled if peer access is already enabled for this device. + */ +hipError_t hipDeviceEnablePeerAccess(int peerDeviceId, unsigned int flags); +/** + * @brief Disables direct access to memory allocations on a peer device. + * + * If direct access to memory allocations on peer device has not been enabled yet from the current + * device, it returns #hipErrorPeerAccessNotEnabled. + * + * @param [in] peerDeviceId Peer device to disable direct access to + * + * @returns #hipSuccess, #hipErrorPeerAccessNotEnabled + */ +hipError_t hipDeviceDisablePeerAccess(int peerDeviceId); + +/** + * @brief Copies memory between two peer accessible devices. + * + * @param [out] dst - Destination device pointer + * @param [in] dstDeviceId - Destination device + * @param [in] src - Source device pointer + * @param [in] srcDeviceId - Source device + * @param [in] sizeBytes - Size of memory copy in bytes + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice + */ +hipError_t hipMemcpyPeer(void* dst, int dstDeviceId, const void* src, int srcDeviceId, + size_t sizeBytes); +/** + * @brief Copies memory between two peer accessible devices asynchronously. + * + * @param [out] dst - Destination device pointer + * @param [in] dstDeviceId - Destination device + * @param [in] src - Source device pointer + * @param [in] srcDevice - Source device + * @param [in] sizeBytes - Size of memory copy in bytes + * @param [in] stream - Stream identifier + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice + */ +hipError_t hipMemcpyPeerAsync(void* dst, int dstDeviceId, const void* src, int srcDevice, + size_t sizeBytes, hipStream_t stream __dparm(0)); + +// doxygen end PeerToPeer +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Context Context Management [Deprecated] + * @{ + * This section describes the context management functions of HIP runtime API. + * + * @warning + * + * On the AMD platform, context management APIs are deprecated as there are better alternate + * interfaces, such as using hipSetDevice and stream APIs to achieve the required functionality. + * + * On the NVIDIA platform, CUDA supports the driver API that defines "Context" and "Devices" as + * separate entities. Each context contains a single device, which can theoretically have multiple + * contexts. HIP initially added limited support for these APIs to facilitate easy porting from + * existing driver codes. + * + * These APIs are only for equivalent driver APIs on the NVIDIA platform. + * + */ + +/** + * @brief Create a context and set it as current/default context + * + * @param [out] ctx Context to create + * @param [in] flags Context creation flags + * @param [in] device device handle + * + * @returns #hipSuccess + * + * @see hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, hipCtxPushCurrent, + * hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxCreate(hipCtx_t* ctx, unsigned int flags, hipDevice_t device); +/** + * @brief Destroy a HIP context [Deprecated] + * + * @param [in] ctx Context to destroy + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipCtxCreate, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,hipCtxSetCurrent, + * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize , hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxDestroy(hipCtx_t ctx); +/** + * @brief Pop the current/default context and return the popped context [Deprecated] + * + * @param [out] ctx The current context to pop + * + * @returns #hipSuccess, #hipErrorInvalidContext + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxSetCurrent, hipCtxGetCurrent, + * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxPopCurrent(hipCtx_t* ctx); +/** + * @brief Push the context to be set as current/ default context [Deprecated] + * + * @param [in] ctx The current context to push + * + * @returns #hipSuccess, #hipErrorInvalidContext + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize , hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxPushCurrent(hipCtx_t ctx); +/** + * @brief Set the passed context as current/default [Deprecated] + * + * @param [in] ctx The context to set as current + * + * @returns #hipSuccess, #hipErrorInvalidContext + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize , hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxSetCurrent(hipCtx_t ctx); +/** + * @brief Get the handle of the current/ default context [Deprecated] + * + * @param [out] ctx The context to get as current + * + * @returns #hipSuccess, #hipErrorInvalidContext + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetDevice, hipCtxGetFlags, hipCtxPopCurrent, + * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxGetCurrent(hipCtx_t* ctx); +/** + * @brief Get the handle of the device associated with current/default context [Deprecated] + * + * @param [out] device The device from the current context + * + * @returns #hipSuccess, #hipErrorInvalidContext + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxGetDevice(hipDevice_t* device); +/** + * @brief Returns the approximate HIP api version. + * + * @param [in] ctx Context to check [Deprecated] + * @param [out] apiVersion API version to get + * + * @returns #hipSuccess + * + * @warning The HIP feature set does not correspond to an exact CUDA SDK api revision. + * This function always set *apiVersion to 4 as an approximation though HIP supports + * some features which were introduced in later CUDA SDK revisions. + * HIP apps code should not rely on the api revision number here and should + * use arch feature flags to test device capabilities or conditional compilation. + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetDevice, hipCtxGetFlags, hipCtxPopCurrent, + * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxGetApiVersion(hipCtx_t ctx, unsigned int* apiVersion); +/** + * @brief Get Cache configuration for a specific function [Deprecated] + * + * @param [out] cacheConfig Cache configuration + * + * @returns #hipSuccess + * + * @warning AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is + * ignored on those architectures. + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxGetCacheConfig(hipFuncCache_t* cacheConfig); +/** + * @brief Set L1/Shared cache partition [Deprecated] + * + * @param [in] cacheConfig Cache configuration to set + * + * @return #hipSuccess + * + * @warning AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is + * ignored on those architectures. + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxSetCacheConfig(hipFuncCache_t cacheConfig); +/** + * @brief Set Shared memory bank configuration [Deprecated] + * + * @param [in] config Shared memory configuration to set + * + * @return #hipSuccess + * + * @warning AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is + * ignored on those architectures. + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config); +/** + * @brief Get Shared memory bank configuration [Deprecated] + * + * @param [out] pConfig Pointer of shared memory configuration + * + * @return #hipSuccess + * + * @warning AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is + * ignored on those architectures. + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig* pConfig); +/** + * @brief Blocks until the default context has completed all preceding requested tasks [Deprecated] + * + * @return #hipSuccess + * + * @warning This function waits for all streams on the default context to complete execution, and + * then returns. + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxSynchronize(void); +/** + * @brief Return flags used for creating default context [Deprecated] + * + * @param [out] flags Pointer of flags + * + * @returns #hipSuccess + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxPopCurrent, hipCtxGetCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxGetFlags(unsigned int* flags); +/** + * @brief Enables direct access to memory allocations in a peer context [Deprecated] + * + * Memory which already allocated on peer device will be mapped into the address space of the + * current device. In addition, all future memory allocations on peerDeviceId will be mapped into + * the address space of the current device when the memory is allocated. The peer memory remains + * accessible from the current device until a call to hipDeviceDisablePeerAccess or hipDeviceReset. + * + * + * @param [in] peerCtx Peer context + * @param [in] flags flags, need to set as 0 + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue, + * #hipErrorPeerAccessAlreadyEnabled + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * @warning PeerToPeer support is experimental. + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags); +/** + * @brief Disable direct access from current context's virtual address space to memory allocations + * physically located on a peer context.Disables direct access to memory allocations in a peer + * context and unregisters any registered allocations [Deprecated] + * + * Returns #hipErrorPeerAccessNotEnabled if direct access to memory on peerDevice has not yet been + * enabled from the current device. + * + * @param [in] peerCtx Peer context to be disabled + * + * @returns #hipSuccess, #hipErrorPeerAccessNotEnabled + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * @warning PeerToPeer support is experimental. + * + * @warning This API is deprecated on the AMD platform, only for equivalent cuCtx driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx); + +/** + * @brief Get the state of the primary context [Deprecated] + * + * @param [in] dev Device to get primary context flags for + * @param [out] flags Pointer to store flags + * @param [out] active Pointer to store context state; 0 = inactive, 1 = active + * + * @returns #hipSuccess + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int* flags, int* active); +/** + * @brief Release the primary context on the GPU. + * + * @param [in] dev Device which primary context is released [Deprecated] + * + * @returns #hipSuccess + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * @warning This function return #hipSuccess though doesn't release the primaryCtx by design on + * HIP/HIP-CLANG path. + * + * @warning This API is deprecated on the AMD platform, only for equivalent driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev); +/** + * @brief Retain the primary context on the GPU [Deprecated] + * + * @param [out] pctx Returned context handle of the new context + * @param [in] dev Device which primary context is released + * + * @returns #hipSuccess + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev); +/** + * @brief Resets the primary context on the GPU [Deprecated] + * + * @param [in] dev Device which primary context is reset + * + * @returns #hipSuccess + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev); +/** + * @brief Set flags for the primary context [Deprecated] + * + * @param [in] dev Device for which the primary context flags are set + * @param [in] flags New flags for the device + * + * @returns #hipSuccess, #hipErrorContextAlreadyInUse + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * + * @warning This API is deprecated on the AMD platform, only for equivalent driver API on the + * NVIDIA platform. + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags); +// doxygen end Context Management +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * + * @defgroup Module Module Management + * @{ + * @ingroup API + * This section describes the module management functions of HIP runtime API. + * + */ +/** + * @brief Loads fatbin object + * + * @param [in] fatbin fatbin to be loaded as a module + * @param [out] module Module + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidContext, #hipErrorFileNotFound, + * #hipErrorOutOfMemory, #hipErrorSharedObjectInitFailed, #hipErrorNotInitialized + * + */ +hipError_t hipModuleLoadFatBinary(hipModule_t* module, const void* fatbin); +/** + * @brief Loads code object from file into a module the currrent context. + * + * @param [in] fname Filename of code object to load + + * @param [out] module Module + * + * @warning File/memory resources allocated in this function are released only in hipModuleUnload. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidContext, #hipErrorFileNotFound, + * #hipErrorOutOfMemory, #hipErrorSharedObjectInitFailed, #hipErrorNotInitialized + * + */ +hipError_t hipModuleLoad(hipModule_t* module, const char* fname); +/** + * @brief Frees the module + * + * @param [in] module Module to free + * + * @returns #hipSuccess, #hipErrorInvalidResourceHandle + * + * The module is freed, and the code objects associated with it are destroyed. + */ +hipError_t hipModuleUnload(hipModule_t module); +/** + * @brief Function with kname will be extracted if present in module + * + * @param [in] module Module to get function from + * @param [in] kname Pointer to the name of function + * @param [out] function Pointer to function handle + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidContext, #hipErrorNotInitialized, + * #hipErrorNotFound, + */ +hipError_t hipModuleGetFunction(hipFunction_t* function, hipModule_t module, const char* kname); + +/** + * @brief Returns the number of functions within a module. + * + * @param [in] mod Module to get function count from + * @param [out] count function count from module + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidContext, #hipErrorNotInitialized, + * #hipErrorNotFound, + */ +hipError_t hipModuleGetFunctionCount(unsigned int* count, hipModule_t mod); + +/** + * @brief Load hip Library from inmemory object + * + * @param [out] library Output Library + * @param [in] code In memory object + * @param [in] jitOptions JIT options, CUDA only + * @param [in] jitOptionsValues JIT options values, CUDA only + * @param [in] numJitOptions Number of JIT options + * @param [in] libraryOptions Library options + * @param [in] libraryOptionValues Library options values + * @param [in] numLibraryOptions Number of library options + * @return #hipSuccess, #hipErrorInvalidValue, + */ +hipError_t hipLibraryLoadData(hipLibrary_t* library, const void* code, hipJitOption* jitOptions, + void** jitOptionsValues, unsigned int numJitOptions, + hipLibraryOption* libraryOptions, void** libraryOptionValues, + unsigned int numLibraryOptions); + +/** + * @brief Load hip Library from file + * + * @param [out] library Output Library + * @param [in] fileName file which contains code object + * @param [in] jitOptions JIT options, CUDA only + * @param [in] jitOptionsValues JIT options values, CUDA only + * @param [in] numJitOptions Number of JIT options + * @param [in] libraryOptions Library options + * @param [in] libraryOptionValues Library options values + * @param [in] numLibraryOptions Number of library options + * @return #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipLibraryLoadFromFile(hipLibrary_t* library, const char* fileName, + hipJitOption* jitOptions, void** jitOptionsValues, + unsigned int numJitOptions, hipLibraryOption* libraryOptions, + void** libraryOptionValues, unsigned int numLibraryOptions); + +/** + * @brief Unload HIP Library + * + * @param [in] library Input created hip library + * @return #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipLibraryUnload(hipLibrary_t library); + +/** + * @brief Get Kernel object from library + * + * @param [out] pKernel Output kernel object + * @param [in] library Input hip library + * @param [in] name kernel name to be searched for + * @return #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipLibraryGetKernel(hipKernel_t* pKernel, hipLibrary_t library, const char* name); + +/** + * @brief Get Kernel count in library + * + * @param [out] count Count of kernels in library + * @param [in] library Input created hip library + * @return #hipSuccess, #hipErrorInvalidValue +*/ +hipError_t hipLibraryGetKernelCount(unsigned int *count, hipLibrary_t library); + +/** + * @brief Retrieve kernel handles within a library + * + * @param [out] kernels Buffer for kernel handles + * @param [in] numKernels Maximum number of kernel handles to return to buffer + * @oaram [in] library Library handle to query from + * @return #hipSuccess, #hipErrorInvalidValue +*/ +hipError_t hipLibraryEnumerateKernels(hipKernel_t* kernels, unsigned int numKernels, + hipLibrary_t library); + +/** + * @brief Returns a Library Handle + * + * @param [out] library Returned Library handle + * @param [in] kernel Kernel to retrieve library Handle + * @return #hipSuccess, #hipErrorInvalidValue +*/ +hipError_t hipKernelGetLibrary(hipLibrary_t* library, hipKernel_t kernel); + +/** + * @brief Returns a Kernel Name + * + * @param [out] name Returned Kernel Name + * @param [in] kernel Kernel handle to retrieve name + * @return #hipSuccess, #hipErrorInvalidValue +*/ +hipError_t hipKernelGetName(const char** name, hipKernel_t kernel); + +/** + * @brief Returns the offset and size of a kernel parameter + * + * @param [in] kernel Kernel handle to retrieve parameter info + * @param [in] paramIndex Index of the parameter + * @param [out] paramOffset returns the offset of the parameter + * @param [out] paramSize Optionally returns the size of the parameter + * + * @return #hipSuccess, #hipErrorInvalidValue +*/ +hipError_t hipKernelGetParamInfo(hipKernel_t kernel, size_t paramIndex, size_t* paramOffset, + size_t* paramSize); + +/** + * @brief Find out attributes for a given function. + * @ingroup Execution + * @param [out] attr Attributes of funtion + * @param [in] func Pointer to the function handle + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDeviceFunction + */ +hipError_t hipFuncGetAttributes(struct hipFuncAttributes* attr, const void* func); +/** + * @brief Find out a specific attribute for a given function. + * @ingroup Execution + * @param [out] value Pointer to the value + * @param [in] attrib Attributes of the given funtion + * @param [in] hfunc Function to get attributes from + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDeviceFunction + */ +hipError_t hipFuncGetAttribute(int* value, hipFunction_attribute attrib, hipFunction_t hfunc); +/** + * @brief Gets pointer to device entry function that matches entry function symbolPtr. + * + * @param [out] functionPtr Device entry function + * @param [in] symbolPtr Pointer to device entry function to search for + * + * @returns #hipSuccess, #hipErrorInvalidDeviceFunction + * + */ +hipError_t hipGetFuncBySymbol(hipFunction_t* functionPtr, const void* symbolPtr); +/** + * @brief Gets function pointer of a requested HIP API + * + * @param [in] symbol The API base name + * @param [out] funcPtr Pointer to the requested function + * @param [in] flags Flags for the search + * @param [out] driverStatus Optional returned status of the search + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGetDriverEntryPoint(const char* symbol, void** funcPtr, unsigned long long flags, + hipDriverEntryPointQueryResult* driverStatus); +/** + * @brief returns the handle of the texture reference with the name from the module. + * + * @param [in] hmod Module + * @param [in] name Pointer of name of texture reference + * @param [out] texRef Pointer of texture reference + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorNotFound, #hipErrorInvalidValue + */ +hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const char* name); +/** + * @brief builds module from code object data which resides in host memory. + * + * The "image" is a pointer to the location of code object data. This data can be either + * a single code object or a fat binary (fatbin), which serves as the entry point for loading and + * launching device-specific kernel executions. + * + * By default, the following command generates a fatbin: + * + * "amdclang++ -O3 -c --offload-device-only --offload-arch= -o " + * + * For more details, refer to: + * + * Kernel Compilation in the HIP kernel language C++ support, or + * HIP runtime compilation (HIP RTC). + * + * @param [in] image The pointer to the location of data + * @param [out] module Retuned module + * + * @returns hipSuccess, hipErrorNotInitialized, hipErrorOutOfMemory, hipErrorNotInitialized + */ +hipError_t hipModuleLoadData(hipModule_t* module, const void* image); +/** + * @brief builds module from code object which resides in host memory. Image is pointer to that + * location. Options are not used. hipModuleLoadData is called. + * + * @param [in] image The pointer to the location of data + * @param [out] module Retuned module + * @param [in] numOptions Number of options + * @param [in] options Options for JIT + * @param [in] optionValues Option values for JIT + * + * @returns hipSuccess, hipErrorNotInitialized, hipErrorOutOfMemory, hipErrorNotInitialized + */ +hipError_t hipModuleLoadDataEx(hipModule_t* module, const void* image, unsigned int numOptions, + hipJitOption* options, void** optionValues); +/** + * @brief Adds bitcode data to be linked with options. + * @param [in] state hip link state + * @param [in] type Type of the input data or bitcode + * @param [in] data Input data which is null terminated + * @param [in] size Size of the input data + * @param [in] name Optional name for this input + * @param [in] numOptions Size of the options + * @param [in] options Array of options applied to this input + * @param [in] optionValues Array of option values cast to void* + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidHandle + * + * If adding the file fails, it will + * @return #hipErrorInvalidConfiguration + * + * @see hipError_t + */ +hipError_t hipLinkAddData(hipLinkState_t state, hipJitInputType type, void* data, size_t size, + const char* name, unsigned int numOptions, hipJitOption* options, + void** optionValues); + +/** + * @brief Adds a file with bitcode to be linked with options. + * @param [in] state hip link state + * @param [in] type Type of the input data or bitcode + * @param [in] path Path to the input file where bitcode is present + * @param [in] numOptions Size of the options + * @param [in] options Array of options applied to this input + * @param [in] optionValues Array of option values cast to void* + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * If adding the file fails, it will + * @return #hipErrorInvalidConfiguration + * + * @see hipError_t + */ +hipError_t hipLinkAddFile(hipLinkState_t state, hipJitInputType type, const char* path, + unsigned int numOptions, hipJitOption* options, void** optionValues); + +/** + * @brief Completes the linking of the given program. + * @param [in] state hip link state + * @param [out] hipBinOut Upon success, points to the output binary + * @param [out] sizeOut Size of the binary is stored (optional) + * + * @returns #hipSuccess #hipErrorInvalidValue + * + * If adding the data fails, it will + * @return #hipErrorInvalidConfiguration + * + * @see hipError_t + */ + +hipError_t hipLinkComplete(hipLinkState_t state, void** hipBinOut, size_t* sizeOut); + +/** + * @brief Creates a linker instance with options. + * @param [in] numOptions Number of options + * @param [in] options Array of options + * @param [in] optionValues Array of option values cast to void* + * @param [out] stateOut hip link state created upon success + * + * @returns #hipSuccess #hipErrorInvalidValue #hipErrorInvalidConfiguration + * + * @see hipSuccess + */ +hipError_t hipLinkCreate(unsigned int numOptions, hipJitOption* options, void** optionValues, + hipLinkState_t* stateOut); +/** + * @brief Deletes the linker instance. + * @param [in] state link state instance + * + * @returns #hipSuccess #hipErrorInvalidValue + * + * @see hipSuccess + */ +hipError_t hipLinkDestroy(hipLinkState_t state); + +/** + * @brief launches kernel f with launch parameters and shared memory on stream with arguments passed + * to kernelparams or extra + * @ingroup Execution + * @param [in] f Kernel to launch. + * @param [in] gridDimX X grid dimension specified as multiple of blockDimX. + * @param [in] gridDimY Y grid dimension specified as multiple of blockDimY. + * @param [in] gridDimZ Z grid dimension specified as multiple of blockDimZ. + * @param [in] blockDimX X block dimensions specified in work-items + * @param [in] blockDimY Y grid dimension specified in work-items + * @param [in] blockDimZ Z grid dimension specified in work-items + * @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The + * HIP-Clang compiler provides support for extern shared declarations. + * @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case th + * default stream is used with associated synchronization rules. + * @param [in] kernelParams Kernel parameters to launch + * @param [in] extra Pointer to kernel arguments. These are passed directly to the kernel and + * must be in the memory layout and alignment expected by the kernel. + * All passed arguments must be naturally aligned according to their type. The memory address of + * each argument should be a multiple of its size in bytes. Please refer to + * hip_porting_driver_api.md for sample usage. + * + * Please note, HIP does not support kernel launch with total work items defined in dimension with + * size gridDim x blockDim >= 2^32. So gridDim.x * blockDim.x, gridDim.y * blockDim.y + * and gridDim.z * blockDim.z are always less than 2^32. + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue + */ +hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, unsigned int gridDimY, + unsigned int gridDimZ, unsigned int blockDimX, + unsigned int blockDimY, unsigned int blockDimZ, + unsigned int sharedMemBytes, hipStream_t stream, + void** kernelParams, void** extra); +/** \addtogroup ModuleCooperativeG Cooperative groups kernel launch of Module management. + * \ingroup Module + * @{ */ +/** + * @brief launches kernel f with launch parameters and shared memory on stream with arguments passed + * to kernelParams, where thread blocks can cooperate and synchronize as they execute + * + * @param [in] f Kernel to launch. + * @param [in] gridDimX X grid dimension specified as multiple of blockDimX. + * @param [in] gridDimY Y grid dimension specified as multiple of blockDimY. + * @param [in] gridDimZ Z grid dimension specified as multiple of blockDimZ. + * @param [in] blockDimX X block dimension specified in work-items. + * @param [in] blockDimY Y block dimension specified in work-items. + * @param [in] blockDimZ Z block dimension specified in work-items. + * @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The + * HIP-Clang compiler provides support for extern shared declarations. + * @param [in] stream Stream where the kernel should be dispatched. May be 0, + * in which case the default stream is used with associated synchronization rules. + * @param [in] kernelParams A list of kernel arguments. + * + * Please note, HIP does not support kernel launch with total work items defined in dimension with + * size \f$ gridDim \cdot blockDim \geq 2^{32} \f$. + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidHandle, #hipErrorInvalidImage, #hipErrorInvalidValue, + * #hipErrorInvalidConfiguration, #hipErrorLaunchFailure, #hipErrorLaunchOutOfResources, + * #hipErrorLaunchTimeOut, #hipErrorCooperativeLaunchTooLarge, #hipErrorSharedObjectInitFailed + */ +hipError_t hipModuleLaunchCooperativeKernel(hipFunction_t f, unsigned int gridDimX, + unsigned int gridDimY, unsigned int gridDimZ, + unsigned int blockDimX, unsigned int blockDimY, + unsigned int blockDimZ, unsigned int sharedMemBytes, + hipStream_t stream, void** kernelParams); +/** + * @brief Launches kernels on multiple devices where thread blocks can cooperate and + * synchronize as they execute. + * + * @param [in] launchParamsList List of launch parameters, one per device. + * @param [in] numDevices Size of the launchParamsList array. + * @param [in] flags Flags to control launch behavior. + * + * @returns #hipSuccess, #hipErrorDeinitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidHandle, #hipErrorInvalidImage, #hipErrorInvalidValue, + * #hipErrorInvalidConfiguration, #hipErrorInvalidResourceHandle, #hipErrorLaunchFailure, + * #hipErrorLaunchOutOfResources, #hipErrorLaunchTimeOut, #hipErrorCooperativeLaunchTooLarge, + * #hipErrorSharedObjectInitFailed + */ +hipError_t hipModuleLaunchCooperativeKernelMultiDevice(hipFunctionLaunchParams* launchParamsList, + unsigned int numDevices, unsigned int flags); +/** + * @brief Launches kernel f with launch parameters and shared memory on stream with arguments passed + * to kernelparams or extra, where thread blocks can cooperate and synchronize as they execute. + * + * @param [in] f - Kernel to launch. + * @param [in] gridDim - Grid dimensions specified as multiple of blockDim. + * @param [in] blockDimX - Block dimensions specified in work-items + * @param [in] kernelParams - Pointer of arguments passed to the kernel. If the kernel has multiple + * parameters, 'kernelParams' should be array of pointers, each points the corresponding argument. + * @param [in] sharedMemBytes - Amount of dynamic shared memory to allocate for this kernel. The + * HIP-Clang compiler provides support for extern shared declarations. + * @param [in] stream - Stream where the kernel should be dispatched. May be 0, in which case th + * default stream is used with associated synchronization rules. + * + * Please note, HIP does not support kernel launch with total work items defined in dimension with + * size \f$ gridDim \cdot blockDim \geq 2^{32} \f$. + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue, + * #hipErrorCooperativeLaunchTooLarge + */ +hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDimX, + void** kernelParams, unsigned int sharedMemBytes, + hipStream_t stream); +/** + * @brief Launches kernels on multiple devices where thread blocks can cooperate and + * synchronize as they execute. + * + * @param [in] launchParamsList List of launch parameters, one per device. + * @param [in] numDevices Size of the launchParamsList array. + * @param [in] flags Flags to control launch behavior. + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue, + * #hipErrorCooperativeLaunchTooLarge + */ +hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, + unsigned int flags); + +// Doxygen end group ModuleCooperativeG +/** @} */ + +/** + * @brief Launches kernels on multiple devices and guarantees all specified kernels are dispatched + * on respective streams before enqueuing any other work on the specified streams from any other + * threads + * @ingroup Execution + * @param [in] launchParamsList List of launch parameters, one per device. + * @param [in] numDevices Size of the launchParamsList array. + * @param [in] flags Flags to control launch behavior. + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue + */ +hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, + unsigned int flags); +/** + * @brief Launches a HIP kernel using a generic function pointer and the specified configuration. + * @ingroup Execution + * + * This function is equivalent to hipLaunchKernelEx but accepts the kernel as a generic function + * pointer. + * + * @param [in] config Pointer to the kernel launch configuration structure. + * @param [in] fPtr Pointer to the device kernel function. + * @param [in] args Array of pointers to the kernel arguments. + * + * @returns #hipSuccess if the kernel is launched successfully, otherwise an appropriate error code. + */ +hipError_t hipLaunchKernelExC(const hipLaunchConfig_t* config, const void* fPtr, void** args); +/** + * @brief Launches a HIP kernel using the driver API with the specified configuration. + * @ingroup Execution + * + * This function dispatches the device kernel represented by a HIP function object. + * It passes both the kernel parameters and any extra configuration arguments to the kernel launch. + * + * @param [in] config Pointer to the kernel launch configuration structure. + * @param [in] f HIP function object representing the device kernel to be launched. + * @param [in] params Array of pointers to the kernel parameters. + * @param [in] extra Array of pointers for additional launch parameters or extra configuration + * data. + * + * @returns #hipSuccess if the kernel is launched successfully, otherwise an appropriate error code. + */ +hipError_t hipDrvLaunchKernelEx(const HIP_LAUNCH_CONFIG* config, hipFunction_t f, void** params, + void** extra); +/** + * @brief Returns a handle for the address range requested. + * + * This function returns a handle to a device pointer created using either hipMalloc set of APIs + * or through hipMemAddressReserve (as long as the ptr is mapped). + * + * @param [out] handle Ptr to the handle where the fd or other types will be returned. + * @param [in] dptr Device ptr for which we get the handle. + * @param [in] size Size of the address range. + * @param [in] handleType Type of the handle requested for the address range. + * @param [in] flags Any flags set regarding the handle requested. + * + * @returns #hipSuccess if the kernel is launched successfully, otherwise an appropriate error code. + */ +hipError_t hipMemGetHandleForAddressRange(void* handle, hipDeviceptr_t dptr, size_t size, + hipMemRangeHandleType handleType, + unsigned long long flags); +// doxygen end Module +/** + * @} + */ + + +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Occupancy Occupancy + * @{ + * This section describes the occupancy functions of HIP runtime API. + * + */ +/** + * @brief determine the grid and block sizes to achieves maximum occupancy for a kernel + * + * @param [out] gridSize minimum grid size for maximum potential occupancy + * @param [out] blockSize block size for maximum potential occupancy + * @param [in] f kernel function for which occupancy is calulated + * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block + * @param [in] blockSizeLimit the maximum block size for the kernel, use 0 for no limit + * + * Please note, HIP does not support kernel launch with total work items defined in dimension with + * size gridDim x blockDim >= 2^32. + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +// TODO - Match CUoccupancyB2DSize +hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, hipFunction_t f, + size_t dynSharedMemPerBlk, int blockSizeLimit); +/** + * @brief determine the grid and block sizes to achieves maximum occupancy for a kernel + * + * @param [out] gridSize minimum grid size for maximum potential occupancy + * @param [out] blockSize block size for maximum potential occupancy + * @param [in] f kernel function for which occupancy is calulated + * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block + * @param [in] blockSizeLimit the maximum block size for the kernel, use 0 for no limit + * @param [in] flags Extra flags for occupancy calculation (only default supported) + * + * Please note, HIP does not support kernel launch with total work items defined in dimension with + * size gridDim x blockDim >= 2^32. + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +// TODO - Match CUoccupancyB2DSize +hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize, + hipFunction_t f, + size_t dynSharedMemPerBlk, + int blockSizeLimit, unsigned int flags); +/** + * @brief Returns occupancy for a device function. + * + * @param [out] numBlocks Returned occupancy + * @param [in] f Kernel function (hipFunction) for which occupancy is calulated + * @param [in] blockSize Block size the kernel is intended to be launched with + * @param [in] dynSharedMemPerBlk Dynamic shared memory usage (in bytes) intended for each block + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, hipFunction_t f, + int blockSize, + size_t dynSharedMemPerBlk); +/** + * @brief Returns occupancy for a device function. + * + * @param [out] numBlocks Returned occupancy + * @param [in] f Kernel function(hipFunction_t) for which occupancy is calulated + * @param [in] blockSize Block size the kernel is intended to be launched with + * @param [in] dynSharedMemPerBlk Dynamic shared memory usage (in bytes) intended for each block + * @param [in] flags Extra flags for occupancy calculation (only default supported) + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags); +/** + * @brief Returns occupancy for a device function. + * + * @param [out] numBlocks Returned occupancy + * @param [in] f Kernel function for which occupancy is calulated + * @param [in] blockSize Block size the kernel is intended to be launched with + * @param [in] dynSharedMemPerBlk Dynamic shared memory usage (in bytes) intended for each block + * @returns #hipSuccess, #hipErrorInvalidDeviceFunction, #hipErrorInvalidValue + */ +hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, const void* f, + int blockSize, size_t dynSharedMemPerBlk); +/** + * @brief Returns occupancy for a device function. + * + * @param [out] numBlocks Returned occupancy + * @param [in] f Kernel function for which occupancy is calulated + * @param [in] blockSize Block size the kernel is intended to be launched with + * @param [in] dynSharedMemPerBlk Dynamic shared memory usage (in bytes) intended for each block + * @param [in] flags Extra flags for occupancy calculation (currently ignored) + * @returns #hipSuccess, #hipErrorInvalidDeviceFunction, #hipErrorInvalidValue + */ +hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + int* numBlocks, const void* f, int blockSize, size_t dynSharedMemPerBlk, + unsigned int flags __dparm(hipOccupancyDefault)); +/** + * @brief determine the grid and block sizes to achieves maximum occupancy for a kernel + * + * @param [out] gridSize minimum grid size for maximum potential occupancy + * @param [out] blockSize block size for maximum potential occupancy + * @param [in] f kernel function for which occupancy is calulated + * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block + * @param [in] blockSizeLimit the maximum block size for the kernel, use 0 for no limit + * + * Please note, HIP does not support kernel launch with total work items defined in dimension with + * size gridDim x blockDim >= 2^32. + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, const void* f, + size_t dynSharedMemPerBlk, int blockSizeLimit); +/** + * @brief Returns dynamic shared memory available per block when launching numBlocks blocks on SM. + * + * @ingroup Occupancy + * Returns in \p *dynamicSmemSize the maximum size of dynamic shared memory / + * to allow numBlocks blocks per SM. + * + * @param [out] dynamicSmemSize Returned maximum dynamic shared memory. + * @param [in] f Kernel function for which occupancy is calculated. + * @param [in] numBlocks Number of blocks to fit on SM + * @param [in] blockSize Size of the block + * + * @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidDeviceFunction, #hipErrorInvalidValue, + * #hipErrorUnknown + */ +hipError_t hipOccupancyAvailableDynamicSMemPerBlock(size_t* dynamicSmemSize, const void* f, + int numBlocks, int blockSize); +// doxygen end Occupancy +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Profiler Profiler Control [Deprecated] + * @{ + * This section describes the profiler control functions of HIP runtime API. + * + * @warning The cudaProfilerInitialize API format for "configFile" is not supported. + * + */ +// TODO - expand descriptions: +/** + * @brief Start recording of profiling information [Deprecated] + * When using this API, start the profiler with profiling disabled. (--startdisabled) + * @returns #hipErrorNotSupported + * @warning hipProfilerStart API is deprecated, use roctracer/rocTX instead. + */ +HIP_DEPRECATED("use roctracer/rocTX instead") +hipError_t hipProfilerStart(); +/** + * @brief Stop recording of profiling information [Deprecated] + * When using this API, start the profiler with profiling disabled. (--startdisabled) + * @returns #hipErrorNotSupported + * @warning hipProfilerStart API is deprecated, use roctracer/rocTX instead. + */ +HIP_DEPRECATED("use roctracer/rocTX instead") +hipError_t hipProfilerStop(); +// doxygen end profiler +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Clang Launch API to support the triple-chevron syntax + * @{ + * This section describes the API to support the triple-chevron syntax. + */ +/** + * @brief Configure a kernel launch. + * + * @param [in] gridDim grid dimension specified as multiple of blockDim. + * @param [in] blockDim block dimensions specified in work-items + * @param [in] sharedMem Amount of dynamic shared memory to allocate for this kernel. The + * HIP-Clang compiler provides support for extern shared declarations. + * @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case the + * default stream is used with associated synchronization rules. + * + * Please note, HIP does not support kernel launch with total work items defined in dimension with + * size gridDim x blockDim >= 2^32. + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue + * + */ +hipError_t hipConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem __dparm(0), + hipStream_t stream __dparm(0)); +/** + * @brief Set a kernel argument. + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue + * + * @param [in] arg Pointer the argument in host memory. + * @param [in] size Size of the argument. + * @param [in] offset Offset of the argument on the argument stack. + * + */ +hipError_t hipSetupArgument(const void* arg, size_t size, size_t offset); +/** + * @brief Launch a kernel. + * + * @param [in] func Kernel to launch. + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue + * + */ +hipError_t hipLaunchByPtr(const void* func); +/** + * @brief Push configuration of a kernel launch. + * + * @param [in] gridDim grid dimension specified as multiple of blockDim. + * @param [in] blockDim block dimensions specified in work-items + * @param [in] sharedMem Amount of dynamic shared memory to allocate for this kernel. The + * HIP-Clang compiler provides support for extern shared declarations. + * @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case the + * default stream is used with associated synchronization rules. + * + * Please note, HIP does not support kernel launch with total work items defined in dimension with + * size gridDim x blockDim >= 2^32. + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue + * + */ +hipError_t __hipPushCallConfiguration(dim3 gridDim, dim3 blockDim, size_t sharedMem __dparm(0), + hipStream_t stream __dparm(0)); +/** + * @brief Pop configuration of a kernel launch. + * + * @param [out] gridDim grid dimension specified as multiple of blockDim. + * @param [out] blockDim block dimensions specified in work-items + * @param [out] sharedMem Amount of dynamic shared memory to allocate for this kernel. The + * HIP-Clang compiler provides support for extern shared declarations. + * @param [out] stream Stream where the kernel should be dispatched. May be 0, in which case the + * default stream is used with associated synchronization rules. + * + * Please note, HIP does not support kernel launch with total work items defined in dimension with + * size gridDim x blockDim >= 2^32. + * + * Please note, HIP does not support kernel launch with total work items defined in dimension with + * size gridDim x blockDim >= 2^32. + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue + * + */ +hipError_t __hipPopCallConfiguration(dim3* gridDim, dim3* blockDim, size_t* sharedMem, + hipStream_t* stream); +/** + * @brief C compliant kernel launch API + * + * @param [in] function_address - Kernel stub function pointer. + * @param [in] numBlocks - Number of blocks. + * @param [in] dimBlocks - Dimension of a block + * @param [in] args - Pointer of arguments passed to the kernel. If the kernel has multiple + * parameters, 'args' should be array of pointers, each points the corresponding argument. + * @param [in] sharedMemBytes - Amount of dynamic shared memory to allocate for this kernel. The + * HIP-Clang compiler provides support for extern shared declarations. + * @param [in] stream - Stream where the kernel should be dispatched. May be 0, in which case th + * default stream is used with associated synchronization rules. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipLaunchKernel(const void* function_address, dim3 numBlocks, dim3 dimBlocks, + void** args, size_t sharedMemBytes __dparm(0), + hipStream_t stream __dparm(0)); + +/** + * @brief Enqueues a host function call in a stream. + * + * @param [in] stream - The stream to enqueue work in. + * @param [in] fn - The function to call once enqueued preceeding operations are complete. + * @param [in] userData - User-specified data to be passed to the function. + * + * @returns #hipSuccess, #hipErrorInvalidResourceHandle, #hipErrorInvalidValue, + * #hipErrorNotSupported + * + * The host function to call in this API will be executed after the preceding operations in + * the stream are complete. The function is a blocking operation that blocks operations in the + * stream that follow it, until the function is returned. + * Event synchronization and internal callback functions make sure enqueued operations will + * execute in order, in the stream. + * + * The host function must not make any HIP API calls. The host function is non-reentrant. It must + * not perform sychronization with any operation that may depend on other processing execution + * but is not enqueued to run earlier in the stream. + * + * Host functions that are enqueued respectively in different non-blocking streams can run + * concurrently. + * + * @warning This API is marked as beta, meaning, while this is feature complete, + * it is still open to changes and may have outstanding issues. + */ +hipError_t hipLaunchHostFunc(hipStream_t stream, hipHostFn_t fn, void* userData); + +/** + * Copies memory for 2D arrays. + * + * @param pCopy - Parameters for the memory copy + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipDrvMemcpy2DUnaligned(const hip_Memcpy2D* pCopy); +// TODO: Move this to hip_ext.h +/** + * @brief Launches kernel from the pointer address, with arguments and shared memory on stream. + * + * @param [in] function_address - Pointer to the Kernel to launch. + * @param [in] numBlocks - Number of blocks. + * @param [in] dimBlocks - Dimension of a block. + * @param [in] args - Pointer of arguments passed to the kernel. If the kernel has multiple + * parameters, 'args' should be array of pointers, each points the corresponding argument. + * @param [in] sharedMemBytes - Amount of dynamic shared memory to allocate for this kernel. + * HIP-Clang compiler provides support for extern shared declarations. + * @param [in] stream - Stream where the kernel should be dispatched. + * May be 0, in which case the default stream is used with associated synchronization rules. + * @param [in] startEvent - If non-null, specified event will be updated to track the start time of + * the kernel launch. The event must be created before calling this API. + * @param [in] stopEvent - If non-null, specified event will be updated to track the stop time of + * the kernel launch. The event must be created before calling this API. + * @param [in] flags - The value of hipExtAnyOrderLaunch, signifies if kernel can be + * launched in any order. + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue. + * + */ +hipError_t hipExtLaunchKernel(const void* function_address, dim3 numBlocks, dim3 dimBlocks, + void** args, size_t sharedMemBytes, hipStream_t stream, + hipEvent_t startEvent, hipEvent_t stopEvent, int flags); +// doxygen end Clang launch +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Texture Texture Management + * @{ + * This section describes the texture management functions of HIP runtime API. + */ + +/** + * @brief Creates a texture object. + * + * @param [out] pTexObject pointer to the texture object to create + * @param [in] pResDesc pointer to resource descriptor + * @param [in] pTexDesc pointer to texture descriptor + * @param [in] pResViewDesc pointer to resource view descriptor + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported, #hipErrorOutOfMemory + * + * @note 3D linear filter isn't supported on GFX90A boards, on which the API @p + * hipCreateTextureObject will return hipErrorNotSupported. + * + */ +hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResourceDesc* pResDesc, + const hipTextureDesc* pTexDesc, + const struct hipResourceViewDesc* pResViewDesc); + +/** + * @brief Destroys a texture object. + * + * @param [in] textureObject texture object to destroy + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject); + +/** + * @brief Gets the channel descriptor in an array. + * + * @param [in] desc pointer to channel format descriptor + * @param [out] array memory array on the device + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array); + +/** + * @brief Gets resource descriptor for the texture object. + * + * @param [out] pResDesc pointer to resource descriptor + * @param [in] textureObject texture object + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc, + hipTextureObject_t textureObject); + +/** + * @brief Gets resource view descriptor for the texture object. + * + * @param [out] pResViewDesc pointer to resource view descriptor + * @param [in] textureObject texture object + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGetTextureObjectResourceViewDesc(struct hipResourceViewDesc* pResViewDesc, + hipTextureObject_t textureObject); + +/** + * @brief Gets texture descriptor for the texture object. + * + * @param [out] pTexDesc pointer to texture descriptor + * @param [in] textureObject texture object + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGetTextureObjectTextureDesc(hipTextureDesc* pTexDesc, + hipTextureObject_t textureObject); + +/** + * @brief Creates a texture object. + * + * @param [out] pTexObject pointer to texture object to create + * @param [in] pResDesc pointer to resource descriptor + * @param [in] pTexDesc pointer to texture descriptor + * @param [in] pResViewDesc pointer to resource view descriptor + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipTexObjectCreate(hipTextureObject_t* pTexObject, const HIP_RESOURCE_DESC* pResDesc, + const HIP_TEXTURE_DESC* pTexDesc, + const HIP_RESOURCE_VIEW_DESC* pResViewDesc); + +/** + * @brief Destroys a texture object. + * + * @param [in] texObject texture object to destroy + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipTexObjectDestroy(hipTextureObject_t texObject); + +/** + * @brief Gets resource descriptor of a texture object. + * + * @param [out] pResDesc pointer to resource descriptor + * @param [in] texObject texture object + * + * @returns #hipSuccess, #hipErrorNotSupported, #hipErrorInvalidValue + * + */ +hipError_t hipTexObjectGetResourceDesc(HIP_RESOURCE_DESC* pResDesc, hipTextureObject_t texObject); + +/** + * @brief Gets resource view descriptor of a texture object. + * + * @param [out] pResViewDesc pointer to resource view descriptor + * @param [in] texObject texture object + * + * @returns #hipSuccess, #hipErrorNotSupported, #hipErrorInvalidValue + * + */ +hipError_t hipTexObjectGetResourceViewDesc(HIP_RESOURCE_VIEW_DESC* pResViewDesc, + hipTextureObject_t texObject); + +/** + * @brief Gets texture descriptor of a texture object. + * + * @param [out] pTexDesc pointer to texture descriptor + * @param [in] texObject texture object + * + * @returns #hipSuccess, #hipErrorNotSupported, #hipErrorInvalidValue + * + */ +hipError_t hipTexObjectGetTextureDesc(HIP_TEXTURE_DESC* pTexDesc, hipTextureObject_t texObject); + +/** + * @brief Allocate a mipmapped array on the device. + * + * @param[out] mipmappedArray - Pointer to allocated mipmapped array in device memory + * @param[in] desc - Requested channel format + * @param[in] extent - Requested allocation size (width field in elements) + * @param[in] numLevels - Number of mipmap levels to allocate + * @param[in] flags - Flags for extensions + * + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryAllocation + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + * + */ +hipError_t hipMallocMipmappedArray(hipMipmappedArray_t* mipmappedArray, + const struct hipChannelFormatDesc* desc, struct hipExtent extent, + unsigned int numLevels, unsigned int flags __dparm(0)); + +/** + * @brief Frees a mipmapped array on the device. + * + * @param[in] mipmappedArray - Pointer to mipmapped array to free + * + * @return #hipSuccess, #hipErrorInvalidValue + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + * + */ +hipError_t hipFreeMipmappedArray(hipMipmappedArray_t mipmappedArray); + +/** + * @brief Gets a mipmap level of a HIP mipmapped array. + * + * @param[out] levelArray - Returned mipmap level HIP array + * @param[in] mipmappedArray - HIP mipmapped array + * @param[in] level - Mipmap level + * + * @return #hipSuccess, #hipErrorInvalidValue + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + * + */ +hipError_t hipGetMipmappedArrayLevel(hipArray_t* levelArray, + hipMipmappedArray_const_t mipmappedArray, unsigned int level); + +/** + * @brief Create a mipmapped array. + * + * @param [out] pHandle pointer to mipmapped array + * @param [in] pMipmappedArrayDesc mipmapped array descriptor + * @param [in] numMipmapLevels mipmap level + * + * @returns #hipSuccess, #hipErrorNotSupported, #hipErrorInvalidValue + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMipmappedArrayCreate(hipMipmappedArray_t* pHandle, + HIP_ARRAY3D_DESCRIPTOR* pMipmappedArrayDesc, + unsigned int numMipmapLevels); + +/** + * @brief Destroy a mipmapped array. + * + * @param [out] hMipmappedArray pointer to mipmapped array to destroy + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + * + */ +hipError_t hipMipmappedArrayDestroy(hipMipmappedArray_t hMipmappedArray); + +/** + * @brief Get a mipmapped array on a mipmapped level. + * + * @param [in] pLevelArray Pointer of array + * @param [out] hMipMappedArray Pointer of mipmapped array on the requested mipmap level + * @param [out] level Mipmap level + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + * + */ +hipError_t hipMipmappedArrayGetLevel(hipArray_t* pLevelArray, hipMipmappedArray_t hMipMappedArray, + unsigned int level); + +/** + * + * @addtogroup TextureD Texture Management [Deprecated] + * @{ + * @ingroup Texture + * This section describes the deprecated texture management functions of HIP runtime API. + */ + +/** + * @brief Binds a mipmapped array to a texture [Deprecated] + * + * @param [in] tex pointer to the texture reference to bind + * @param [in] mipmappedArray memory mipmapped array on the device + * @param [in] desc opointer to the channel format + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipBindTextureToMipmappedArray(const textureReference* tex, + hipMipmappedArray_const_t mipmappedArray, + const hipChannelFormatDesc* desc); + +/** + * @brief Gets the texture reference related with the symbol [Deprecated] + * + * @param [out] texref texture reference + * @param [in] symbol pointer to the symbol related with the texture for the reference + * + * @returns #hipSuccess, #hipErrorInvalidValue + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipGetTextureReference(const textureReference** texref, const void* symbol); + +/** + * @brief Gets the border color used by a texture reference [Deprecated] + * + * @param [out] pBorderColor Returned Type and Value of RGBA color. + * @param [in] texRef Texture reference. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefGetBorderColor(float* pBorderColor, const textureReference* texRef); + +/** + * @brief Gets the array bound to a texture reference [Deprecated] + + * + * @param [in] pArray Returned array. + * @param [in] texRef texture reference. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefGetArray(hipArray_t* pArray, const textureReference* texRef); + +/** + * @brief Sets address mode for a texture reference [Deprecated] + * + * @param [in] texRef texture reference. + * @param [in] dim Dimension of the texture. + * @param [in] am Value of the texture address mode. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetAddressMode(textureReference* texRef, int dim, + enum hipTextureAddressMode am); +/** + * @brief Binds an array as a texture reference [Deprecated] + * + * @param [in] tex Pointer texture reference. + * @param [in] array Array to bind. + * @param [in] flags Flags should be set as HIP_TRSA_OVERRIDE_FORMAT, as a valid value. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetArray(textureReference* tex, hipArray_const_t array, unsigned int flags); +/** + * @brief Set filter mode for a texture reference [Deprecated] + * + * @param [in] texRef Pointer texture reference. + * @param [in] fm Value of texture filter mode. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetFilterMode(textureReference* texRef, enum hipTextureFilterMode fm); +/** + * @brief Set flags for a texture reference [Deprecated] + * + * @param [in] texRef Pointer texture reference. + * @param [in] Flags Value of flags. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetFlags(textureReference* texRef, unsigned int Flags); +/** + * @brief Set format for a texture reference [Deprecated] + * + * @param [in] texRef Pointer texture reference. + * @param [in] fmt Value of format. + * @param [in] NumPackedComponents Number of components per array. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetFormat(textureReference* texRef, hipArray_Format fmt, + int NumPackedComponents); +/** + * @brief Binds a memory area to a texture [Deprecated] + * + * @param [in] offset Offset in bytes. + * @param [in] tex Texture to bind. + * @param [in] devPtr Pointer of memory on the device. + * @param [in] desc Pointer of channel format descriptor. + * @param [in] size Size of memory in bites. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipBindTexture(size_t* offset, const textureReference* tex, const void* devPtr, + const hipChannelFormatDesc* desc, size_t size __dparm(UINT_MAX)); +/** + * @brief Binds a 2D memory area to a texture [Deprecated] + * + * @param [in] offset Offset in bytes. + * @param [in] tex Texture to bind. + * @param [in] devPtr Pointer of 2D memory area on the device. + * @param [in] desc Pointer of channel format descriptor. + * @param [in] width Width in texel units. + * @param [in] height Height in texel units. + * @param [in] pitch Pitch in bytes. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipBindTexture2D(size_t* offset, const textureReference* tex, const void* devPtr, + const hipChannelFormatDesc* desc, size_t width, size_t height, + size_t pitch); +/** + * @brief Binds a memory area to a texture [Deprecated] + * + * @param [in] tex Pointer of texture reference. + * @param [in] array Array to bind. + * @param [in] desc Pointer of channel format descriptor. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipBindTextureToArray(const textureReference* tex, hipArray_const_t array, + const hipChannelFormatDesc* desc); +/** + * @brief Get the offset of the alignment in a texture [Deprecated] + * + * @param [in] offset Offset in bytes. + * @param [in] texref Pointer of texture reference. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipGetTextureAlignmentOffset(size_t* offset, const textureReference* texref); +/** + * @brief Unbinds a texture [Deprecated] + * + * @param [in] tex Texture to unbind. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipUnbindTexture(const textureReference* tex); +/** + * @brief Gets the address for a texture reference [Deprecated] + * + * @param [out] dev_ptr Pointer of device address. + * @param [in] texRef Pointer of texture reference. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefGetAddress(hipDeviceptr_t* dev_ptr, const textureReference* texRef); +/** + * @brief Gets the address mode for a texture reference [Deprecated] + * + * @param [out] pam Pointer of address mode. + * @param [in] texRef Pointer of texture reference. + * @param [in] dim Dimension. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefGetAddressMode(enum hipTextureAddressMode* pam, const textureReference* texRef, + int dim); +/** + * @brief Gets filter mode for a texture reference [Deprecated] + * + * @param [out] pfm Pointer of filter mode. + * @param [in] texRef Pointer of texture reference. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefGetFilterMode(enum hipTextureFilterMode* pfm, const textureReference* texRef); +/** + * @brief Gets flags for a texture reference [Deprecated] + * + * @param [out] pFlags Pointer of flags. + * @param [in] texRef Pointer of texture reference. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefGetFlags(unsigned int* pFlags, const textureReference* texRef); +/** + * @brief Gets texture format for a texture reference [Deprecated] + * + * @param [out] pFormat Pointer of the format. + * @param [out] pNumChannels Pointer of number of channels. + * @param [in] texRef Pointer of texture reference. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefGetFormat(hipArray_Format* pFormat, int* pNumChannels, + const textureReference* texRef); +/** + * @brief Gets the maximum anisotropy for a texture reference [Deprecated] + * + * @param [out] pmaxAnsio Pointer of the maximum anisotropy. + * @param [in] texRef Pointer of texture reference. + * + * @returns #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefGetMaxAnisotropy(int* pmaxAnsio, const textureReference* texRef); +/** + * @brief Gets the mipmap filter mode for a texture reference [Deprecated] + * + * @param [out] pfm Pointer of the mipmap filter mode. + * @param [in] texRef Pointer of texture reference. + * + * @returns #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefGetMipmapFilterMode(enum hipTextureFilterMode* pfm, + const textureReference* texRef); +/** + * @brief Gets the mipmap level bias for a texture reference [Deprecated] + * + * @param [out] pbias Pointer of the mipmap level bias. + * @param [in] texRef Pointer of texture reference. + * + * @returns #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefGetMipmapLevelBias(float* pbias, const textureReference* texRef); +/** + * @brief Gets the minimum and maximum mipmap level clamps for a texture reference [Deprecated] + * + * @param [out] pminMipmapLevelClamp Pointer of the minimum mipmap level clamp. + * @param [out] pmaxMipmapLevelClamp Pointer of the maximum mipmap level clamp. + * @param [in] texRef Pointer of texture reference. + * + * @returns #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefGetMipmapLevelClamp(float* pminMipmapLevelClamp, float* pmaxMipmapLevelClamp, + const textureReference* texRef); +/** + * @brief Gets the mipmapped array bound to a texture reference [Deprecated] + * + * @param [out] pArray Pointer of the mipmapped array. + * @param [in] texRef Pointer of texture reference. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefGetMipMappedArray(hipMipmappedArray_t* pArray, const textureReference* texRef); +/** + * @brief Sets an bound address for a texture reference [Deprecated] + * + * @param [out] ByteOffset Pointer of the offset in bytes. + * @param [in] texRef Pointer of texture reference. + * @param [in] dptr Pointer of device address to bind. + * @param [in] bytes Size in bytes. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetAddress(size_t* ByteOffset, textureReference* texRef, hipDeviceptr_t dptr, + size_t bytes); +/** + * @brief Set a bind an address as a 2D texture reference [Deprecated] + * + * @param [in] texRef Pointer of texture reference. + * @param [in] desc Pointer of array descriptor. + * @param [in] dptr Pointer of device address to bind. + * @param [in] Pitch Pitch in bytes. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetAddress2D(textureReference* texRef, const HIP_ARRAY_DESCRIPTOR* desc, + hipDeviceptr_t dptr, size_t Pitch); +/** + * @brief Sets the maximum anisotropy for a texture reference [Deprecated] + * + * @param [in] texRef Pointer of texture reference. + * @param [out] maxAniso Value of the maximum anisotropy. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetMaxAnisotropy(textureReference* texRef, unsigned int maxAniso); +/** + * @brief Sets border color for a texture reference [Deprecated] + * + * @param [in] texRef Pointer of texture reference. + * @param [in] pBorderColor Pointer of border color. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetBorderColor(textureReference* texRef, float* pBorderColor); +/** + * @brief Sets mipmap filter mode for a texture reference [Deprecated] + * + * @param [in] texRef Pointer of texture reference. + * @param [in] fm Value of filter mode. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetMipmapFilterMode(textureReference* texRef, enum hipTextureFilterMode fm); +/** + * @brief Sets mipmap level bias for a texture reference [Deprecated] + * + * @param [in] texRef Pointer of texture reference. + * @param [in] bias Value of mipmap bias. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetMipmapLevelBias(textureReference* texRef, float bias); +/** + * @brief Sets mipmap level clamp for a texture reference [Deprecated] + * + * @param [in] texRef Pointer of texture reference. + * @param [in] minMipMapLevelClamp Value of minimum mipmap level clamp. + * @param [in] maxMipMapLevelClamp Value of maximum mipmap level clamp. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetMipmapLevelClamp(textureReference* texRef, float minMipMapLevelClamp, + float maxMipMapLevelClamp); +/** + * @brief Binds mipmapped array to a texture reference [Deprecated] + * + * @param [in] texRef Pointer of texture reference to bind. + * @param [in] mipmappedArray Pointer of mipmapped array to bind. + * @param [in] Flags Flags should be set as HIP_TRSA_OVERRIDE_FORMAT, as a valid value. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @warning This API is deprecated. + * + */ +HIP_DEPRECATED(HIP_DEPRECATED_MSG) +hipError_t hipTexRefSetMipmappedArray(textureReference* texRef, + struct hipMipmappedArray* mipmappedArray, unsigned int Flags); + +// doxygen end deprecated texture management +/** + * @} + */ + +// doxygen end Texture management +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Runtime Runtime Compilation + * @{ + * This section describes the runtime compilation functions of HIP runtime API. + * + */ +// This group is for HIPrtc + +// doxygen end Runtime +/** + * @} + */ + +/** + * + * @defgroup Callback Callback Activity APIs + * @{ + * This section describes the callback/Activity of HIP runtime API. + */ +/** + * @brief Returns HIP API name by ID. + * + * @param [in] id ID of HIP API + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +const char* hipApiName(uint32_t id); +/** + * @brief Returns kernel name reference by function name. + * + * @param [in] f Name of function + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +const char* hipKernelNameRef(const hipFunction_t f); +/** + * @brief Retrives kernel for a given host pointer, unless stated otherwise. + * + * @param [in] hostFunction Pointer of host function. + * @param [in] stream Stream the kernel is executed on. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +const char* hipKernelNameRefByPtr(const void* hostFunction, hipStream_t stream); +/** + * @brief Returns device ID on the stream. + * + * @param [in] stream Stream of device executed on. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +int hipGetStreamDeviceId(hipStream_t stream); + +// doxygen end Callback +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Graph Graph Management + * @{ + * This section describes the graph management types & functions of HIP runtime API. + */ + +/** + * @brief Begins graph capture on a stream. + * + * @param [in] stream - Stream to initiate capture. + * @param [in] mode - Controls the interaction of this capture sequence with other API calls that + * are not safe. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipStreamBeginCapture(hipStream_t stream, hipStreamCaptureMode mode); + +/** +* @brief Begins graph capture on a stream to an existing graph. +* +* @param [in] stream - Stream to initiate capture. +* @param [in] graph - Graph to capture into. +* @param [in] dependencies - Dependencies of the first node captured in the stream. Can be NULL if +* numDependencies is 0. +* @param [in] dependencyData - Optional array of data associated with each dependency. +* @param [in] numDependencies - Number of dependencies. +* @param [in] mode - Controls the interaction of this capture sequence with other API calls that +are not safe. +* +* @returns #hipSuccess, #hipErrorInvalidValue +* +* @warning param "const hipGraphEdgeData* dependencyData" is currently not supported and has to be +passed as nullptr. This API is marked as beta, meaning, while this is feature complete, it is still +open to changes and may have outstanding issues. +* +*/ +hipError_t hipStreamBeginCaptureToGraph(hipStream_t stream, hipGraph_t graph, + const hipGraphNode_t* dependencies, + const hipGraphEdgeData* dependencyData, + size_t numDependencies, hipStreamCaptureMode mode); + +/** + * @brief Ends capture on a stream, returning the captured graph. + * + * @param [in] stream - Stream to end capture. + * @param [out] pGraph - Captured graph. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipStreamEndCapture(hipStream_t stream, hipGraph_t* pGraph); + +/** + * @brief Get capture status of a stream. + * + * @param [in] stream - Stream of which to get capture status from. + * @param [out] pCaptureStatus - Returns current capture status. + * @param [out] pId - Unique capture ID. + * + * @returns #hipSuccess, #hipErrorStreamCaptureImplicit + * + */ +hipError_t hipStreamGetCaptureInfo(hipStream_t stream, hipStreamCaptureStatus* pCaptureStatus, + unsigned long long* pId); + +/** + * @brief Get stream's capture state + * + * @param [in] stream - Stream of which to get capture status from. + * @param [out] captureStatus_out - Returns current capture status. + * @param [out] id_out - Unique capture ID. + * @param [out] graph_out - Returns the graph being captured into. + * @param [out] dependencies_out - Pointer to an array of nodes representing the graphs + * dependencies. + * @param [out] numDependencies_out - Returns size of the array returned in dependencies_out. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorStreamCaptureImplicit + * + */ +hipError_t hipStreamGetCaptureInfo_v2(hipStream_t stream, hipStreamCaptureStatus* captureStatus_out, + unsigned long long* id_out __dparm(0), + hipGraph_t* graph_out __dparm(0), + const hipGraphNode_t** dependencies_out __dparm(0), + size_t* numDependencies_out __dparm(0)); + +/** + * @brief Get stream's capture state + * + * @param [in] stream - Stream of which to get capture status from. + * @param [out] pCaptureStatus - Returns current capture status. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorStreamCaptureImplicit + * + */ +hipError_t hipStreamIsCapturing(hipStream_t stream, hipStreamCaptureStatus* pCaptureStatus); + +/** + * @brief Update the set of dependencies in a capturing stream + * + * @param [in] stream Stream that is being captured. + * @param [in] dependencies Pointer to an array of nodes to add/replace. + * @param [in] numDependencies Size of the dependencies array. + * @param [in] flags Flag to update dependency set. Should be one of the values + * in enum #hipStreamUpdateCaptureDependenciesFlags. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorIllegalState + * + */ +hipError_t hipStreamUpdateCaptureDependencies(hipStream_t stream, hipGraphNode_t* dependencies, + size_t numDependencies, + unsigned int flags __dparm(0)); + +/** + * @brief Swaps the stream capture mode of a thread. + * + * @param [in] mode - Pointer to mode value to swap with the current mode. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipThreadExchangeStreamCaptureMode(hipStreamCaptureMode* mode); + +/** + * @brief Creates a graph + * + * @param [out] pGraph - pointer to graph to create. + * @param [in] flags - flags for graph creation, must be 0. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryAllocation + * + */ +hipError_t hipGraphCreate(hipGraph_t* pGraph, unsigned int flags); + +/** + * @brief Destroys a graph + * + * @param [in] graph - instance of graph to destroy. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphDestroy(hipGraph_t graph); + +/** + * @brief Adds dependency edges to a graph. + * + * @param [in] graph - Instance of the graph to add dependencies to. + * @param [in] from - Pointer to the graph nodes with dependencies to add from. + * @param [in] to - Pointer to the graph nodes to add dependencies to. + * @param [in] numDependencies - Number of dependencies to add. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddDependencies(hipGraph_t graph, const hipGraphNode_t* from, + const hipGraphNode_t* to, size_t numDependencies); + +/** + * @brief Removes dependency edges from a graph. + * + * @param [in] graph - Instance of the graph to remove dependencies from. + * @param [in] from - Array of nodes that provide the dependencies. + * @param [in] to - Array of dependent nodes. + * @param [in] numDependencies - Number of dependencies to remove. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphRemoveDependencies(hipGraph_t graph, const hipGraphNode_t* from, + const hipGraphNode_t* to, size_t numDependencies); + +/** + * @brief Returns a graph's dependency edges. + * + * @param [in] graph - Instance of the graph to get the edges from. + * @param [out] from - Pointer to the graph nodes to return edge endpoints. + * @param [out] to - Pointer to the graph nodes to return edge endpoints. + * @param [out] numEdges - Returns number of edges. + * @returns #hipSuccess, #hipErrorInvalidValue + * + * from and to may both be NULL, in which case this function only returns the number of edges in + * numEdges. Otherwise, numEdges entries will be filled in. If numEdges is higher than the actual + * number of edges, the remaining entries in from and to will be set to NULL, and the number of + * edges actually returned will be written to numEdges. + * + */ +hipError_t hipGraphGetEdges(hipGraph_t graph, hipGraphNode_t* from, hipGraphNode_t* to, + size_t* numEdges); + +/** + * @brief Returns a graph's nodes. + * + * @param [in] graph - Instance of graph to get the nodes from. + * @param [out] nodes - Pointer to return the graph nodes. + * @param [out] numNodes - Returns the number of graph nodes. + * @returns #hipSuccess, #hipErrorInvalidValue + * + * nodes may be NULL, in which case this function will return the number of nodes in numNodes. + * Otherwise, numNodes entries will be filled in. If numNodes is higher than the actual number of + * nodes, the remaining entries in nodes will be set to NULL, and the number of nodes actually + * obtained will be returned in numNodes. + * + */ +hipError_t hipGraphGetNodes(hipGraph_t graph, hipGraphNode_t* nodes, size_t* numNodes); + +/** + * @brief Returns a graph's root nodes. + * + * @param [in] graph - Instance of the graph to get the nodes from. + * @param [out] pRootNodes - Pointer to return the graph's root nodes. + * @param [out] pNumRootNodes - Returns the number of graph's root nodes. + * @returns #hipSuccess, #hipErrorInvalidValue + * + * pRootNodes may be NULL, in which case this function will return the number of root nodes in + * pNumRootNodes. Otherwise, pNumRootNodes entries will be filled in. If pNumRootNodes is higher + * than the actual number of root nodes, the remaining entries in pRootNodes will be set to NULL, + * and the number of nodes actually obtained will be returned in pNumRootNodes. + * + */ +hipError_t hipGraphGetRootNodes(hipGraph_t graph, hipGraphNode_t* pRootNodes, + size_t* pNumRootNodes); + +/** + * @brief Returns a node's dependencies. + * + * @param [in] node - Graph node to get the dependencies from. + * @param [out] pDependencies - Pointer to return the dependencies. + * @param [out] pNumDependencies - Returns the number of graph node dependencies. + * @returns #hipSuccess, #hipErrorInvalidValue + * + * pDependencies may be NULL, in which case this function will return the number of dependencies in + * pNumDependencies. Otherwise, pNumDependencies entries will be filled in. If pNumDependencies is + * higher than the actual number of dependencies, the remaining entries in pDependencies will be set + * to NULL, and the number of nodes actually obtained will be returned in pNumDependencies. + * + */ +hipError_t hipGraphNodeGetDependencies(hipGraphNode_t node, hipGraphNode_t* pDependencies, + size_t* pNumDependencies); + +/** + * @brief Returns a node's dependent nodes. + * + * @param [in] node - Graph node to get the dependent nodes from. + * @param [out] pDependentNodes - Pointer to return the graph dependent nodes. + * @param [out] pNumDependentNodes - Returns the number of graph node dependent nodes. + * @returns #hipSuccess, #hipErrorInvalidValue + * + * pDependentNodes may be NULL, in which case this function will return the number of dependent + * nodes in pNumDependentNodes. Otherwise, pNumDependentNodes entries will be filled in. If + * pNumDependentNodes is higher than the actual number of dependent nodes, the remaining entries in + * pDependentNodes will be set to NULL, and the number of nodes actually obtained will be returned + * in pNumDependentNodes. + * + */ +hipError_t hipGraphNodeGetDependentNodes(hipGraphNode_t node, hipGraphNode_t* pDependentNodes, + size_t* pNumDependentNodes); + +/** + * @brief Returns a node's type. + * + * @param [in] node - Node to get type of. + * @param [out] pType - Returns the node's type. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphNodeGetType(hipGraphNode_t node, hipGraphNodeType* pType); + +/** + * @brief Remove a node from the graph. + * + * @param [in] node - graph node to remove + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphDestroyNode(hipGraphNode_t node); + +/** + * @brief Clones a graph. + * + * @param [out] pGraphClone - Returns newly created cloned graph. + * @param [in] originalGraph - original graph to clone from. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryAllocation + * + */ +hipError_t hipGraphClone(hipGraph_t* pGraphClone, hipGraph_t originalGraph); + +/** + * @brief Finds a cloned version of a node. + * + * @param [out] pNode - Returns the cloned node. + * @param [in] originalNode - original node handle. + * @param [in] clonedGraph - Cloned graph to query. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphNodeFindInClone(hipGraphNode_t* pNode, hipGraphNode_t originalNode, + hipGraph_t clonedGraph); + +/** + * @brief Creates an executable graph from a graph + * + * @param [out] pGraphExec - Pointer to instantiated executable graph. + * @param [in] graph - Instance of graph to instantiate. + * @param [out] pErrorNode - Pointer to error node. In case an error occured during + * graph instantiation, it could modify the corresponding node. + * @param [out] pLogBuffer - Pointer to log buffer. + * @param [out] bufferSize - Size of the log buffer. + * + * @returns #hipSuccess, #hipErrorOutOfMemory + * + */ +hipError_t hipGraphInstantiate(hipGraphExec_t* pGraphExec, hipGraph_t graph, + hipGraphNode_t* pErrorNode, char* pLogBuffer, size_t bufferSize); + +/** + * @brief Creates an executable graph from a graph. + * + * @param [out] pGraphExec - Pointer to instantiated executable graph. + * @param [in] graph - Instance of graph to instantiate. + * @param [in] flags - Flags to control instantiation. + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @warning This API does not support any of flag and is behaving as hipGraphInstantiate. + */ +hipError_t hipGraphInstantiateWithFlags(hipGraphExec_t* pGraphExec, hipGraph_t graph, + unsigned long long flags); + +/** + * @brief Creates an executable graph from a graph. + * + * @param [out] pGraphExec - Pointer to instantiated executable graph. + * @param [in] graph - Instance of graph to instantiate. + * @param [in] instantiateParams - Graph instantiation Params + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphInstantiateWithParams(hipGraphExec_t* pGraphExec, hipGraph_t graph, + hipGraphInstantiateParams* instantiateParams); +/** + * @brief Launches an executable graph in the specified stream. + * + * @param [in] graphExec - Instance of executable graph to launch. + * @param [in] stream - Instance of stream in which to launch executable graph. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphLaunch(hipGraphExec_t graphExec, hipStream_t stream); + +/** + * @brief Uploads an executable graph to a stream + * + * @param [in] graphExec - Instance of executable graph to be uploaded. + * @param [in] stream - Instance of stream to which the executable graph is uploaded to. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphUpload(hipGraphExec_t graphExec, hipStream_t stream); + +/** + * @brief Creates a kernel execution node and adds it to a graph. + * + * @param [out] pGraphNode - Pointer to kernel graph node that is created. + * @param [in] graph - Instance of graph to add the created node to. + * @param [in] pDependencies - Pointer to the dependencies on the kernel execution node. + * @param [in] numDependencies - Number of dependencies. + * @param [in] nodeParams - Pointer to the node parameters. + * @returns #hipSuccess, #hipErrorInvalidValue. + * + */ +hipError_t hipGraphAddNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + hipGraphNodeParams* nodeParams); + +/** + * @brief Return the flags of an executable graph. + * + * @param [in] graphExec - Executable graph to get the flags from. + * @param [out] flags - Flags used to instantiate this executable graph. + * @returns #hipSuccess, #hipErrorInvalidValue. + * + */ +hipError_t hipGraphExecGetFlags(hipGraphExec_t graphExec, unsigned long long* flags); + +/** + * @brief Updates parameters of a graph's node. + * + * @param [in] node - Instance of the node to set parameters for. + * @param [in] nodeParams - Pointer to the parameters to be set. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDeviceFunction, + * #hipErrorNotSupported. + * + */ +hipError_t hipGraphNodeSetParams(hipGraphNode_t node, hipGraphNodeParams* nodeParams); + +/** + * @brief Updates parameters of an executable graph's node. + * + * @param [in] graphExec - Instance of the executable graph. + * @param [in] node - Instance of the node to set parameters to. + * @param [in] nodeParams - Pointer to the parameters to be set. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDeviceFunction, + * #hipErrorNotSupported. + * + */ +hipError_t hipGraphExecNodeSetParams(hipGraphExec_t graphExec, hipGraphNode_t node, + hipGraphNodeParams* nodeParams); + +/** + * @brief Destroys an executable graph + * + * @param [in] graphExec - Instance of executable graph to destroy. + * + * @returns #hipSuccess. + * + */ +hipError_t hipGraphExecDestroy(hipGraphExec_t graphExec); + +// Check whether an executable graph can be updated with a graph and perform the update if possible. +/** + * @brief Check whether an executable graph can be updated with a graph and perform the update if * + * possible. + * + * @param [in] hGraphExec - instance of executable graph to update. + * @param [in] hGraph - graph that contains the updated parameters. + * @param [in] hErrorNode_out - node which caused the permissibility check to forbid the update. + * @param [in] updateResult_out - Return code whether the graph update was performed. + * @returns #hipSuccess, #hipErrorGraphExecUpdateFailure + * + */ +hipError_t hipGraphExecUpdate(hipGraphExec_t hGraphExec, hipGraph_t hGraph, + hipGraphNode_t* hErrorNode_out, + hipGraphExecUpdateResult* updateResult_out); + +/** + * @brief Creates a kernel execution node and adds it to a graph. + * + * @param [out] pGraphNode - Pointer to graph node that is created + * @param [in] graph - Instance of graph to add the created node to. + * @param [in] pDependencies - Pointer to the dependencies of the kernel execution node. + * @param [in] numDependencies - The number of the dependencies. + * @param [in] pNodeParams - Pointer to the parameters of the kernel execution node. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDeviceFunction + * + */ +hipError_t hipGraphAddKernelNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + const hipKernelNodeParams* pNodeParams); + +/** + * @brief Gets kernel node's parameters. + * + * @param [in] node - instance of the node to get parameters from. + * @param [out] pNodeParams - pointer to the parameters + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphKernelNodeGetParams(hipGraphNode_t node, hipKernelNodeParams* pNodeParams); + +/** + * @brief Sets a kernel node's parameters. + * + * @param [in] node - Instance of the node to set parameters of. + * @param [in] pNodeParams - const pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphKernelNodeSetParams(hipGraphNode_t node, const hipKernelNodeParams* pNodeParams); + +/** + * @brief Sets the parameters for a kernel node in the given graphExec. + * + * @param [in] hGraphExec - Instance of the executable graph with the node. + * @param [in] node - Instance of the node to set parameters of. + * @param [in] pNodeParams - const pointer to the kernel node parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExecKernelNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, + const hipKernelNodeParams* pNodeParams); + +/** + * @brief Creates a memcpy node and adds it to a graph. + * + * @param [out] phGraphNode - Pointer to graph node that is created. + * @param [in] hGraph - Instance of graph to add the created node to. + * @param [in] dependencies - const pointer to the dependencies of the memcpy execution node. + * @param [in] numDependencies - The number of dependencies. + * @param [in] copyParams - const pointer to the parameters for the memory copy. + * @param [in] ctx - context related to current device. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipDrvGraphAddMemcpyNode(hipGraphNode_t* phGraphNode, hipGraph_t hGraph, + const hipGraphNode_t* dependencies, size_t numDependencies, + const HIP_MEMCPY3D* copyParams, hipCtx_t ctx); +/** + * @brief Creates a memcpy node and adds it to a graph. + * + * @param [out] pGraphNode - Pointer to graph node that is created. + * @param [in] graph - Instance of graph to add the created node to. + * @param [in] pDependencies - const pointer to the dependencies of the memcpy execution node. + * @param [in] numDependencies - The number of dependencies. + * @param [in] pCopyParams - const pointer to the parameters for the memory copy. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddMemcpyNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + const hipMemcpy3DParms* pCopyParams); +/** + * @brief Gets a memcpy node's parameters. + * + * @param [in] node - instance of the node to get parameters from. + * @param [out] pNodeParams - pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphMemcpyNodeGetParams(hipGraphNode_t node, hipMemcpy3DParms* pNodeParams); + +/** + * @brief Sets a memcpy node's parameters. + * + * @param [in] node - instance of the node to set parameters to. + * @param [in] pNodeParams - const pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphMemcpyNodeSetParams(hipGraphNode_t node, const hipMemcpy3DParms* pNodeParams); + +/** + * @brief Sets a node's attribute. + * + * @param [in] hNode - Instance of the node to set parameters of. + * @param [in] attr - The attribute type to be set. + * @param [in] value - const pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphKernelNodeSetAttribute(hipGraphNode_t hNode, hipKernelNodeAttrID attr, + const hipKernelNodeAttrValue* value); +/** + * @brief Gets a node's attribute. + * + * @param [in] hNode - Instance of the node to set parameters of. + * @param [in] attr - The attribute type to be set. + * @param [in] value - const pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphKernelNodeGetAttribute(hipGraphNode_t hNode, hipKernelNodeAttrID attr, + hipKernelNodeAttrValue* value); +/** + * @brief Sets the parameters of a memcpy node in the given graphExec. + * + * @param [in] hGraphExec - Instance of the executable graph with the node. + * @param [in] node - Instance of the node to set parameters of. + * @param [in] pNodeParams - const pointer to the kernel node parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExecMemcpyNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, + hipMemcpy3DParms* pNodeParams); + +/** + * @brief Creates a 1D memcpy node and adds it to a graph. + * + * @param [out] pGraphNode - Pointer to graph node that is created. + * @param [in] graph - Instance of graph to add the created node to. + * @param [in] pDependencies - const pointer to the dependencies of the memcpy execution node. + * @param [in] numDependencies - The number of dependencies. + * @param [in] dst - Pointer to memory address of the destination. + * @param [in] src - Pointer to memory address of the source. + * @param [in] count - Size of the memory to copy. + * @param [in] kind - Type of memory copy. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddMemcpyNode1D(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + void* dst, const void* src, size_t count, hipMemcpyKind kind); + +/** + * @brief Sets a memcpy node's parameters to perform a 1-dimensional copy. + * + * @param [in] node - Instance of the node to set parameters of. + * @param [in] dst - Pointer to memory address of the destination. + * @param [in] src - Pointer to memory address of the source. + * @param [in] count - Size of the memory to copy. + * @param [in] kind - Type of memory copy. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphMemcpyNodeSetParams1D(hipGraphNode_t node, void* dst, const void* src, + size_t count, hipMemcpyKind kind); + +/** + * @brief Sets the parameters for a memcpy node in the given graphExec to perform a 1-dimensional + * copy. + * + * @param [in] hGraphExec - Instance of the executable graph with the node. + * @param [in] node - Instance of the node to set parameters of. + * @param [in] dst - Pointer to memory address of the destination. + * @param [in] src - Pointer to memory address of the source. + * @param [in] count - Size of the memory to copy. + * @param [in] kind - Type of memory copy. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExecMemcpyNodeSetParams1D(hipGraphExec_t hGraphExec, hipGraphNode_t node, + void* dst, const void* src, size_t count, + hipMemcpyKind kind); + +/** + * @brief Creates a memcpy node to copy from a symbol on the device and adds it to a graph. + * + * @param [out] pGraphNode - Pointer to graph node that is created. + * @param [in] graph - Instance of graph to add the created node to. + * @param [in] pDependencies - const pointer to the dependencies of the memcpy execution node. + * @param [in] numDependencies - Number of the dependencies. + * @param [in] dst - Pointer to memory address of the destination. + * @param [in] symbol - Device symbol address. + * @param [in] count - Size of the memory to copy. + * @param [in] offset - Offset from start of symbol in bytes. + * @param [in] kind - Type of memory copy. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddMemcpyNodeFromSymbol(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, + size_t numDependencies, void* dst, const void* symbol, + size_t count, size_t offset, hipMemcpyKind kind); + +/** + * @brief Sets a memcpy node's parameters to copy from a symbol on the device. + * + * @param [in] node - Instance of the node to set parameters of. + * @param [in] dst - Pointer to memory address of the destination. + * @param [in] symbol - Device symbol address. + * @param [in] count - Size of the memory to copy. + * @param [in] offset - Offset from start of symbol in bytes. + * @param [in] kind - Type of memory copy. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphMemcpyNodeSetParamsFromSymbol(hipGraphNode_t node, void* dst, const void* symbol, + size_t count, size_t offset, hipMemcpyKind kind); + +/** + * @brief Sets the parameters for a memcpy node in the given graphExec to copy from a symbol on the + * * device. + * + * @param [in] hGraphExec - Instance of the executable graph with the node. + * @param [in] node - Instance of the node to set parameters of. + * @param [in] dst - Pointer to memory address of the destination. + * @param [in] symbol - Device symbol address. + * @param [in] count - Size of the memory to copy. + * @param [in] offset - Offset from start of symbol in bytes. + * @param [in] kind - Type of memory copy. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExecMemcpyNodeSetParamsFromSymbol(hipGraphExec_t hGraphExec, hipGraphNode_t node, + void* dst, const void* symbol, size_t count, + size_t offset, hipMemcpyKind kind); + +/** + * @brief Creates a memcpy node to copy to a symbol on the device and adds it to a graph. + * + * @param [out] pGraphNode - Pointer to graph node that is created. + * @param [in] graph - Instance of graph to add the created node to. + * @param [in] pDependencies - const pointer to the dependencies on the memcpy execution node. + * @param [in] numDependencies - Number of dependencies. + * @param [in] symbol - Device symbol address. + * @param [in] src - Pointer to memory address of the src. + * @param [in] count - Size of the memory to copy. + * @param [in] offset - Offset from start of symbol in bytes. + * @param [in] kind - Type of memory copy. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddMemcpyNodeToSymbol(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, + size_t numDependencies, const void* symbol, + const void* src, size_t count, size_t offset, + hipMemcpyKind kind); + +/** + * @brief Sets a memcpy node's parameters to copy to a symbol on the device. + * + * @param [in] node - Instance of the node to set parameters of. + * @param [in] symbol - Device symbol address. + * @param [in] src - Pointer to memory address of the src. + * @param [in] count - Size of the memory to copy. + * @param [in] offset - Offset from start of symbol in bytes. + * @param [in] kind - Type of memory copy. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphMemcpyNodeSetParamsToSymbol(hipGraphNode_t node, const void* symbol, + const void* src, size_t count, size_t offset, + hipMemcpyKind kind); + + +/** + * @brief Sets the parameters for a memcpy node in the given graphExec to copy to a symbol on the + * device. + * @param [in] hGraphExec - Instance of the executable graph with the node. + * @param [in] node - Instance of the node to set parameters of. + * @param [in] symbol - Device symbol address. + * @param [in] src - Pointer to memory address of the src. + * @param [in] count - Size of the memory to copy. + * @param [in] offset - Offset from start of symbol in bytes. + * @param [in] kind - Type of memory copy. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExecMemcpyNodeSetParamsToSymbol(hipGraphExec_t hGraphExec, hipGraphNode_t node, + const void* symbol, const void* src, + size_t count, size_t offset, hipMemcpyKind kind); + +/** + * @brief Creates a memset node and adds it to a graph. + * + * @param [out] pGraphNode - Pointer to graph node that is created. + * @param [in] graph - Instance of the graph to add the created node to. + * @param [in] pDependencies - const pointer to the dependencies on the memset execution node. + * @param [in] numDependencies - Number of dependencies. + * @param [in] pMemsetParams - const pointer to the parameters for the memory set. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddMemsetNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + const hipMemsetParams* pMemsetParams); + +/** + * @brief Gets a memset node's parameters. + * + * @param [in] node - Instance of the node to get parameters of. + * @param [out] pNodeParams - Pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphMemsetNodeGetParams(hipGraphNode_t node, hipMemsetParams* pNodeParams); + +/** + * @brief Sets a memset node's parameters. + * + * @param [in] node - Instance of the node to set parameters of. + * @param [in] pNodeParams - Pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphMemsetNodeSetParams(hipGraphNode_t node, const hipMemsetParams* pNodeParams); + +/** + * @brief Sets the parameters for a memset node in the given graphExec. + * + * @param [in] hGraphExec - Instance of the executable graph with the node. + * @param [in] node - Instance of the node to set parameters of. + * @param [in] pNodeParams - Pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExecMemsetNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, + const hipMemsetParams* pNodeParams); + +/** + * @brief Creates a host execution node and adds it to a graph. + * + * @param [out] pGraphNode - Pointer to graph node that is created. + * @param [in] graph - Instance of the graph to add the created node to. + * @param [in] pDependencies - const pointer to the dependencies of the memset execution node. + * @param [in] numDependencies - Number of dependencies. + * @param [in] pNodeParams - Pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddHostNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + const hipHostNodeParams* pNodeParams); + +/** + * @brief Returns a host node's parameters. + * + * @param [in] node - Instance of the node to get parameters of. + * @param [out] pNodeParams - Pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphHostNodeGetParams(hipGraphNode_t node, hipHostNodeParams* pNodeParams); + +/** + * @brief Sets a host node's parameters. + * + * @param [in] node - Instance of the node to set parameters of. + * @param [in] pNodeParams - Pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphHostNodeSetParams(hipGraphNode_t node, const hipHostNodeParams* pNodeParams); + +/** + * @brief Sets the parameters for a host node in the given graphExec. + * + * @param [in] hGraphExec - Instance of the executable graph with the node. + * @param [in] node - Instance of the node to set parameters of. + * @param [in] pNodeParams - Pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExecHostNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, + const hipHostNodeParams* pNodeParams); + +/** + * @brief Creates a child graph node and adds it to a graph. + * + * @param [out] pGraphNode - Pointer to graph node that is created. + * @param [in] graph - Instance of the graph to add the created node. + * @param [in] pDependencies - const pointer to the dependencies of the memset execution node. + * @param [in] numDependencies - Number of dependencies. + * @param [in] childGraph - Graph to clone into this node + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddChildGraphNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + hipGraph_t childGraph); + +/** + * @brief Gets a handle to the embedded graph of a child graph node. + * + * @param [in] node - Instance of the node to get child graph of. + * @param [out] pGraph - Pointer to get the graph. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphChildGraphNodeGetGraph(hipGraphNode_t node, hipGraph_t* pGraph); + +/** + * @brief Updates node parameters in the child graph node in the given graphExec. + * + * @param [in] hGraphExec - instance of the executable graph with the node. + * @param [in] node - node from the graph which was used to instantiate graphExec. + * @param [in] childGraph - child graph with updated parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExecChildGraphNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, + hipGraph_t childGraph); + +/** + * @brief Creates an empty node and adds it to a graph. + * + * @param [out] pGraphNode - Pointer to graph node that is created. + * @param [in] graph - Instance of the graph the node is added to. + * @param [in] pDependencies - const pointer to the node dependencies. + * @param [in] numDependencies - Number of dependencies. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddEmptyNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies); + + +/** + * @brief Creates an event record node and adds it to a graph. + * + * @param [out] pGraphNode - Pointer to graph node that is created. + * @param [in] graph - Instance of the graph the node is added to. + * @param [in] pDependencies - const pointer to the node dependencies. + * @param [in] numDependencies - Number of dependencies. + * @param [in] event - Event of the node. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddEventRecordNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + hipEvent_t event); + +/** + * @brief Returns the event associated with an event record node. + * + * @param [in] node - Instance of the node to get event of. + * @param [out] event_out - Pointer to return the event. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphEventRecordNodeGetEvent(hipGraphNode_t node, hipEvent_t* event_out); + +/** + * @brief Sets an event record node's event. + * + * @param [in] node - Instance of the node to set event to. + * @param [in] event - Pointer to the event. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphEventRecordNodeSetEvent(hipGraphNode_t node, hipEvent_t event); + +/** + * @brief Sets the event for an event record node in the given graphExec. + * + * @param [in] hGraphExec - instance of the executable graph with the node. + * @param [in] hNode - node from the graph which was used to instantiate graphExec. + * @param [in] event - pointer to the event. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExecEventRecordNodeSetEvent(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + hipEvent_t event); + +/** + * @brief Creates an event wait node and adds it to a graph. + * + * @param [out] pGraphNode - Pointer to graph node that is created. + * @param [in] graph - Instance of the graph the node to be added. + * @param [in] pDependencies - const pointer to the node dependencies. + * @param [in] numDependencies - Number of dependencies. + * @param [in] event - Event for the node. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddEventWaitNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + hipEvent_t event); + + +/** + * @brief Returns the event associated with an event wait node. + * + * @param [in] node - Instance of the node to get event of. + * @param [out] event_out - Pointer to return the event. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphEventWaitNodeGetEvent(hipGraphNode_t node, hipEvent_t* event_out); + +/** + * @brief Sets an event wait node's event. + * + * @param [in] node - Instance of the node to set event of. + * @param [in] event - Pointer to the event. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphEventWaitNodeSetEvent(hipGraphNode_t node, hipEvent_t event); + +/** + * @brief Sets the event for an event record node in the given graphExec. + * + * @param [in] hGraphExec - instance of the executable graph with the node. + * @param [in] hNode - node from the graph which was used to instantiate graphExec. + * @param [in] event - pointer to the event. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExecEventWaitNodeSetEvent(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + hipEvent_t event); + +/** + * @brief Creates a memory allocation node and adds it to a graph + * + * @param [out] pGraphNode - Pointer to the graph node to create and add to the graph + * @param [in] graph - Instance of the graph node to be added + * @param [in] pDependencies - Const pointer to the node dependencies + * @param [in] numDependencies - The number of dependencies + * @param [in, out] pNodeParams - Node parameters for memory allocation, returns a pointer to the + * allocated memory. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddMemAllocNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + hipMemAllocNodeParams* pNodeParams); + +/** + * @brief Returns parameters for memory allocation node + * + * @param [in] node - Memory allocation node to query + * @param [out] pNodeParams - Parameters for the specified memory allocation node + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphMemAllocNodeGetParams(hipGraphNode_t node, hipMemAllocNodeParams* pNodeParams); + +/** + * @brief Creates a memory free node and adds it to a graph + * + * @param [out] pGraphNode - Pointer to the graph node to create and add to the graph + * @param [in] graph - Instance of the graph node to be added + * @param [in] pDependencies - Const pointer to the node dependencies + * @param [in] numDependencies - The number of dependencies + * @param [in] dev_ptr - Pointer to the memory to be freed + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddMemFreeNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + void* dev_ptr); + +/** + * @brief Returns parameters for memory free node + * + * @param [in] node - Memory free node to query + * @param [out] dev_ptr - Device pointer of the specified memory free node + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphMemFreeNodeGetParams(hipGraphNode_t node, void* dev_ptr); + +/** + * @brief Get the mem attribute for graphs. + * + * @param [in] device - Device to get attributes from + * @param [in] attr - Attribute type to be queried + * @param [out] value - Value of the queried attribute + * @returns #hipSuccess, #hipErrorInvalidDevice + * + */ +hipError_t hipDeviceGetGraphMemAttribute(int device, hipGraphMemAttributeType attr, void* value); + +/** + * @brief Set the mem attribute for graphs. + * + * @param [in] device - Device to set attribute of. + * @param [in] attr - Attribute type to be set. + * @param [in] value - Value of the attribute. + * @returns #hipSuccess, #hipErrorInvalidDevice + * + */ +hipError_t hipDeviceSetGraphMemAttribute(int device, hipGraphMemAttributeType attr, void* value); + +/** + * @brief Free unused memory reserved for graphs on a specific device and return it back to the OS. + * + * @param [in] device - Device for which memory should be trimmed + * @returns #hipSuccess, #hipErrorInvalidDevice + * + */ +hipError_t hipDeviceGraphMemTrim(int device); + +/** + * @brief Create an instance of userObject to manage lifetime of a resource. + * + * @param [out] object_out - pointer to instace of userobj. + * @param [in] ptr - pointer to pass to destroy function. + * @param [in] destroy - destroy callback to remove resource. + * @param [in] initialRefcount - reference to resource. + * @param [in] flags - flags passed to API. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipUserObjectCreate(hipUserObject_t* object_out, void* ptr, hipHostFn_t destroy, + unsigned int initialRefcount, unsigned int flags); + +/** + * @brief Release number of references to resource. + * + * @param [in] object - pointer to instace of userobj. + * @param [in] count - reference to resource to be retained. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipUserObjectRelease(hipUserObject_t object, unsigned int count __dparm(1)); + +/** + * @brief Retain number of references to resource. + * + * @param [in] object - pointer to instace of userobj. + * @param [in] count - reference to resource to be retained. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipUserObjectRetain(hipUserObject_t object, unsigned int count __dparm(1)); + +/** + * @brief Retain user object for graphs. + * + * @param [in] graph - pointer to graph to retain the user object for. + * @param [in] object - pointer to instace of userobj. + * @param [in] count - reference to resource to be retained. + * @param [in] flags - flags passed to API. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphRetainUserObject(hipGraph_t graph, hipUserObject_t object, + unsigned int count __dparm(1), unsigned int flags __dparm(0)); + +/** + * @brief Release user object from graphs. + * + * @param [in] graph - pointer to graph to retain the user object for. + * @param [in] object - pointer to instace of userobj. + * @param [in] count - reference to resource to be retained. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphReleaseUserObject(hipGraph_t graph, hipUserObject_t object, + unsigned int count __dparm(1)); + +/** + * @brief Write a DOT file describing graph structure. + * + * @param [in] graph - graph object for which DOT file has to be generated. + * @param [in] path - path to write the DOT file. + * @param [in] flags - Flags from hipGraphDebugDotFlags to get additional node information. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorOperatingSystem + * + */ +hipError_t hipGraphDebugDotPrint(hipGraph_t graph, const char* path, unsigned int flags); + +/** + * @brief Copies attributes from source node to destination node. + * + * Copies attributes from source node to destination node. + * Both node must have the same context. + * + * @param [out] hDst - Destination node. + * @param [in] hSrc - Source node. + * For list of attributes see ::hipKernelNodeAttrID. + * + * @returns #hipSuccess, #hipErrorInvalidContext + * + */ +hipError_t hipGraphKernelNodeCopyAttributes(hipGraphNode_t hSrc, hipGraphNode_t hDst); + +/** + * @brief Enables or disables the specified node in the given graphExec + * + * Sets hNode to be either enabled or disabled. Disabled nodes are functionally equivalent + * to empty nodes until they are reenabled. Existing node parameters are not affected by + * disabling/enabling the node. + * + * The node is identified by the corresponding hNode in the non-executable graph, from which the + * executable graph was instantiated. + * + * hNode must not have been removed from the original graph. + * + * @note Currently only kernel, memset and memcpy nodes are supported. + * + * @param [in] hGraphExec - The executable graph in which to set the specified node. + * @param [in] hNode - Node from the graph from which graphExec was instantiated. + * @param [in] isEnabled - Node is enabled if != 0, otherwise the node is disabled. + * + * @returns #hipSuccess, #hipErrorInvalidValue, + * + */ +hipError_t hipGraphNodeSetEnabled(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + unsigned int isEnabled); +/** + * @brief Query whether a node in the given graphExec is enabled + * + * Sets isEnabled to 1 if hNode is enabled, or 0 if it is disabled. + * + * The node is identified by the corresponding node in the non-executable graph, from which the + * executable graph was instantiated. + * + * hNode must not have been removed from the original graph. + * + * @note Currently only kernel, memset and memcpy nodes are supported. + * + * @param [in] hGraphExec - The executable graph in which to set the specified node. + * @param [in] hNode - Node from the graph from which graphExec was instantiated. + * @param [out] isEnabled - Location to return the enabled status of the node. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphNodeGetEnabled(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + unsigned int* isEnabled); + +/** + * @brief Creates a external semaphor wait node and adds it to a graph. + * + * @param [out] pGraphNode - pointer to the graph node to create. + * @param [in] graph - instance of the graph to add the created node. + * @param [in] pDependencies - const pointer to the dependencies on the memset execution node. + * @param [in] numDependencies - the number of the dependencies. + * @param [in] nodeParams -pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddExternalSemaphoresWaitNode( + hipGraphNode_t* pGraphNode, hipGraph_t graph, const hipGraphNode_t* pDependencies, + size_t numDependencies, const hipExternalSemaphoreWaitNodeParams* nodeParams); + +/** + * @brief Creates a external semaphor signal node and adds it to a graph. + * + * @param [out] pGraphNode - pointer to the graph node to create. + * @param [in] graph - instance of the graph to add the created node. + * @param [in] pDependencies - const pointer to the dependencies on the memset execution node. + * @param [in] numDependencies - the number of the dependencies. + * @param [in] nodeParams -pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphAddExternalSemaphoresSignalNode( + hipGraphNode_t* pGraphNode, hipGraph_t graph, const hipGraphNode_t* pDependencies, + size_t numDependencies, const hipExternalSemaphoreSignalNodeParams* nodeParams); +/** + * @brief Updates node parameters in the external semaphore signal node. + * + * @param [in] hNode - Node from the graph from which graphExec was instantiated. + * @param [in] nodeParams - Pointer to the params to be set. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExternalSemaphoresSignalNodeSetParams( + hipGraphNode_t hNode, const hipExternalSemaphoreSignalNodeParams* nodeParams); +/** + * @brief Updates node parameters in the external semaphore wait node. + * + * @param [in] hNode - Node from the graph from which graphExec was instantiated. + * @param [in] nodeParams - Pointer to the params to be set. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExternalSemaphoresWaitNodeSetParams( + hipGraphNode_t hNode, const hipExternalSemaphoreWaitNodeParams* nodeParams); +/** + * @brief Returns external semaphore signal node params. + * + * @param [in] hNode - Node from the graph from which graphExec was instantiated. + * @param [out] params_out - Pointer to params. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExternalSemaphoresSignalNodeGetParams( + hipGraphNode_t hNode, hipExternalSemaphoreSignalNodeParams* params_out); +/** + * @brief Returns external semaphore wait node params. + * + * @param [in] hNode - Node from the graph from which graphExec was instantiated. + * @param [out] params_out - Pointer to params. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExternalSemaphoresWaitNodeGetParams( + hipGraphNode_t hNode, hipExternalSemaphoreWaitNodeParams* params_out); +/** + * @brief Updates node parameters in the external semaphore signal node in the given graphExec. + * + * @param [in] hGraphExec - The executable graph in which to set the specified node. + * @param [in] hNode - Node from the graph from which graphExec was instantiated. + * @param [in] nodeParams - Pointer to the params to be set. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExecExternalSemaphoresSignalNodeSetParams( + hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + const hipExternalSemaphoreSignalNodeParams* nodeParams); +/** + * @brief Updates node parameters in the external semaphore wait node in the given graphExec. + * + * @param [in] hGraphExec - The executable graph in which to set the specified node. + * @param [in] hNode - Node from the graph from which graphExec was instantiated. + * @param [in] nodeParams - Pointer to the params to be set. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphExecExternalSemaphoresWaitNodeSetParams( + hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + const hipExternalSemaphoreWaitNodeParams* nodeParams); + +/** + * @brief Gets a memcpy node's parameters. + * + * @param [in] hNode - instance of the node to get parameters from. + * @param [out] nodeParams - pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipDrvGraphMemcpyNodeGetParams(hipGraphNode_t hNode, HIP_MEMCPY3D* nodeParams); + +/** + * @brief Sets a memcpy node's parameters. + * + * @param [in] hNode - instance of the node to Set parameters for. + * @param [out] nodeParams - pointer to the parameters. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipDrvGraphMemcpyNodeSetParams(hipGraphNode_t hNode, const HIP_MEMCPY3D* nodeParams); + +/** + * @brief Creates a memset node and adds it to a graph. + * + * @param [out] phGraphNode - pointer to graph node to create. + * @param [in] hGraph - instance of graph to add the created node to. + * @param [in] dependencies - const pointer to the dependencies on the memset execution node. + * @param [in] numDependencies - number of the dependencies. + * @param [in] memsetParams - const pointer to the parameters for the memory set. + * @param [in] ctx - cotext related to current device. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipDrvGraphAddMemsetNode(hipGraphNode_t* phGraphNode, hipGraph_t hGraph, + const hipGraphNode_t* dependencies, size_t numDependencies, + const hipMemsetParams* memsetParams, hipCtx_t ctx); + +/** + * @brief Creates a memory free node and adds it to a graph + * + * @param [out] phGraphNode - Pointer to the graph node to create and add to the graph + * @param [in] hGraph - Instance of the graph the node to be added + * @param [in] dependencies - Const pointer to the node dependencies + * @param [in] numDependencies - The number of dependencies + * @param [in] dptr - Pointer to the memory to be freed + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipDrvGraphAddMemFreeNode(hipGraphNode_t* phGraphNode, hipGraph_t hGraph, + const hipGraphNode_t* dependencies, size_t numDependencies, + hipDeviceptr_t dptr); + +/** + * @brief Sets the parameters for a memcpy node in the given graphExec. + * + * @param [in] hGraphExec - instance of the executable graph with the node. + * @param [in] hNode - instance of the node to set parameters to. + * @param [in] copyParams - const pointer to the memcpy node params. + * @param [in] ctx - cotext related to current device. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipDrvGraphExecMemcpyNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + const HIP_MEMCPY3D* copyParams, hipCtx_t ctx); + +/** + * @brief Sets the parameters for a memset node in the given graphExec. + * + * @param [in] hGraphExec - instance of the executable graph with the node. + * @param [in] hNode - instance of the node to set parameters to. + * @param [in] memsetParams - pointer to the parameters. + * @param [in] ctx - cotext related to current device. + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipDrvGraphExecMemsetNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + const hipMemsetParams* memsetParams, hipCtx_t ctx); + +// doxygen end graph API +/** + * @} + */ + + +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Virtual Virtual Memory Management + * @{ + * This section describes the virtual memory management functions of HIP runtime API. + * + * @note Please note, the virtual memory management functions of HIP runtime + * API are implemented on Linux, under development on Windows. The + * following Virtual Memory Management APIs are not (yet) + * supported in HIP: + * - hipMemMapArrayAsync + */ + +/** + * @brief Frees an address range reservation made via hipMemAddressReserve + * + * @param [in] devPtr - starting address of the range. + * @param [in] size - size of the range. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemAddressFree(void* devPtr, size_t size); + +/** + * @brief Reserves an address range + * + * @param [out] ptr - starting address of the reserved range. + * @param [in] size - size of the reservation. + * @param [in] alignment - alignment of the address. + * @param [in] addr - requested starting address of the range. + * @param [in] flags - currently unused, must be zero. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemAddressReserve(void** ptr, size_t size, size_t alignment, void* addr, + unsigned long long flags); + +/** + * @brief Creates a memory allocation described by the properties and size + * + * @param [out] handle - value of the returned handle. + * @param [in] size - size of the allocation. + * @param [in] prop - properties of the allocation. + * @param [in] flags - currently unused, must be zero. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemCreate(hipMemGenericAllocationHandle_t* handle, size_t size, + const hipMemAllocationProp* prop, unsigned long long flags); + +/** + * @brief Exports an allocation to a requested shareable handle type. + * + * @param [out] shareableHandle - value of the returned handle. + * @param [in] handle - handle to share. + * @param [in] handleType - type of the shareable handle. + * @param [in] flags - currently unused, must be zero. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemExportToShareableHandle(void* shareableHandle, + hipMemGenericAllocationHandle_t handle, + hipMemAllocationHandleType handleType, + unsigned long long flags); + +/** + * @brief Get the access flags set for the given location and ptr. + * + * @param [out] flags - flags for this location. + * @param [in] location - target location. + * @param [in] ptr - address to check the access flags. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemGetAccess(unsigned long long* flags, const hipMemLocation* location, void* ptr); + +/** + * @brief Calculates either the minimal or recommended granularity. + * + * @param [out] granularity - returned granularity. + * @param [in] prop - location properties. + * @param [in] option - determines which granularity to return. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + * + */ +hipError_t hipMemGetAllocationGranularity(size_t* granularity, const hipMemAllocationProp* prop, + hipMemAllocationGranularity_flags option); + +/** + * @brief Retrieve the property structure of the given handle. + * + * @param [out] prop - properties of the given handle. + * @param [in] handle - handle to perform the query on. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemGetAllocationPropertiesFromHandle(hipMemAllocationProp* prop, + hipMemGenericAllocationHandle_t handle); + +/** + * @brief Imports an allocation from a requested shareable handle type. + * + * @param [out] handle - returned value. + * @param [in] osHandle - shareable handle representing the memory allocation. + * @param [in] shHandleType - handle type. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemImportFromShareableHandle(hipMemGenericAllocationHandle_t* handle, void* osHandle, + hipMemAllocationHandleType shHandleType); + +/** + * @brief Maps an allocation handle to a reserved virtual address range. + * + * @param [in] ptr - address where the memory will be mapped. + * @param [in] size - size of the mapping. + * @param [in] offset - offset into the memory, currently must be zero. + * @param [in] handle - memory allocation to be mapped. + * @param [in] flags - currently unused, must be zero. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemMap(void* ptr, size_t size, size_t offset, hipMemGenericAllocationHandle_t handle, + unsigned long long flags); + +/** + * @brief Maps or unmaps subregions of sparse HIP arrays and sparse HIP mipmapped arrays. + * + * @param [in] mapInfoList - list of hipArrayMapInfo. + * @param [in] count - number of hipArrayMapInfo in mapInfoList. + * @param [in] stream - stream identifier for the stream to use for map or unmap operations. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is under development. Currently it is not supported on AMD + * GPUs and returns #hipErrorNotSupported. + */ +hipError_t hipMemMapArrayAsync(hipArrayMapInfo* mapInfoList, unsigned int count, + hipStream_t stream); + +/** + * @brief Release a memory handle representing a memory allocation which was previously allocated + * through hipMemCreate. + * + * @param [in] handle - handle of the memory allocation. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemRelease(hipMemGenericAllocationHandle_t handle); + +/** + * @brief Returns the allocation handle of the backing memory allocation given the address. + * + * @param [out] handle - handle representing addr. + * @param [in] addr - address to look up. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemRetainAllocationHandle(hipMemGenericAllocationHandle_t* handle, void* addr); + +/** + * @brief Set the access flags for each location specified in desc for the given virtual address + * range. + * + * @param [in] ptr - starting address of the virtual address range. + * @param [in] size - size of the range. + * @param [in] desc - array of hipMemAccessDesc. + * @param [in] count - number of hipMemAccessDesc in desc. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemSetAccess(void* ptr, size_t size, const hipMemAccessDesc* desc, size_t count); + +/** + * @brief Unmap memory allocation of a given address range. + * + * @param [in] ptr - starting address of the range to unmap. + * @param [in] size - size of the virtual address range. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotSupported + * @warning This API is marked as Beta. While this feature is complete, it can + * change and might have outstanding issues. + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +hipError_t hipMemUnmap(void* ptr, size_t size); + +// doxygen end virtual memory management API +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup GraphicsInterop Graphics Interoperability + * @{ + * This section describes graphics interoperability functions of HIP runtime API. + */ + +/** + * @brief Maps a graphics resource for access. + * + * @param [in] count - Number of resources to map. + * @param [in] resources - Pointer of resources to map. + * @param [in] stream - Stream for synchronization. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorUnknown, #hipErrorInvalidResourceHandle + * + */ +hipError_t hipGraphicsMapResources(int count, hipGraphicsResource_t* resources, + hipStream_t stream __dparm(0)); +/** + * @brief Get an array through which to access a subresource of a mapped graphics resource. + * + * @param [out] array - Pointer of array through which a subresource of resource may be accessed. + * @param [in] resource - Mapped resource to access. + * @param [in] arrayIndex - Array index for the subresource to access. + * @param [in] mipLevel - Mipmap level for the subresource to access. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @note In this API, the value of arrayIndex higher than zero is currently not supported. + * + */ +hipError_t hipGraphicsSubResourceGetMappedArray(hipArray_t* array, hipGraphicsResource_t resource, + unsigned int arrayIndex, unsigned int mipLevel); +/** + * @brief Gets device accessible address of a graphics resource. + * + * @param [out] devPtr - Pointer of device through which graphic resource may be accessed. + * @param [out] size - Size of the buffer accessible from devPtr. + * @param [in] resource - Mapped resource to access. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipGraphicsResourceGetMappedPointer(void** devPtr, size_t* size, + hipGraphicsResource_t resource); +/** + * @brief Unmaps graphics resources. + * + * @param [in] count - Number of resources to unmap. + * @param [in] resources - Pointer of resources to unmap. + * @param [in] stream - Stream for synchronization. + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorUnknown, #hipErrorContextIsDestroyed + * + */ +hipError_t hipGraphicsUnmapResources(int count, hipGraphicsResource_t* resources, + hipStream_t stream __dparm(0)); +/** + * @brief Unregisters a graphics resource. + * + * @param [in] resource - Graphics resources to unregister. + * + * @returns #hipSuccess + * + */ +hipError_t hipGraphicsUnregisterResource(hipGraphicsResource_t resource); +// doxygen end GraphicsInterop +/** + * @} + */ + +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Surface Surface Object + * @{ + * + * This section describes surface object functions of HIP runtime API. + * + * @note APIs in this section are under development. + * + */ + +/** + * @brief Create a surface object. + * + * @param [out] pSurfObject Pointer of surface object to be created. + * @param [in] pResDesc Pointer of suface object descriptor. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject, const hipResourceDesc* pResDesc); +/** + * @brief Destroy a surface object. + * + * @param [in] surfaceObject Surface object to be destroyed. + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject); +// end of surface +/** + * @} + */ + +/** + * @brief Enable HIP runtime logging. + * + * This function enables the HIP runtime logging mechanism, allowing diagnostic + * and trace information to be captured during HIP API execution. + * + * @returns #hipSuccess + * + * @see hipExtDisableLogging, hipExtSetLoggingParams + */ +hipError_t hipExtEnableLogging(); +/** + * @brief Disable HIP runtime logging. + * + * This function disables the HIP runtime logging mechanism, stopping the capture + * of diagnostic and trace information during HIP API execution. + * + * @returns #hipSuccess + * + * @see hipExtEnableLogging, hipExtSetLoggingParams + */ +hipError_t hipExtDisableLogging(); +/** + * @brief Set HIP runtime logging parameters. + * + * This function configures the logging behavior of the HIP runtime, including + * the verbosity level, buffer size, and which components to log. + * + * @param [in] log_level The logging verbosity level. Higher values produce more detailed output. + * @param [in] log_size Reserved for future use. Currently not implemented. + * @param [in] log_mask A bitmask specifying which HIP runtime components to log. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipExtEnableLogging, hipExtDisableLogging + */ +hipError_t hipExtSetLoggingParams(size_t log_level, size_t log_size, size_t log_mask); + +#ifdef __cplusplus +} /* extern "c" */ +#endif +#ifdef __cplusplus +#if defined(__clang__) && defined(__HIP__) +template static hipError_t __host__ inline hipOccupancyMaxPotentialBlockSize( + int* gridSize, int* blockSize, T f, size_t dynSharedMemPerBlk = 0, int blockSizeLimit = 0) { + return hipOccupancyMaxPotentialBlockSize(gridSize, blockSize, reinterpret_cast(f), + dynSharedMemPerBlk, blockSizeLimit); +} +template static hipError_t __host__ inline hipOccupancyMaxPotentialBlockSizeWithFlags( + int* gridSize, int* blockSize, T f, size_t dynSharedMemPerBlk = 0, int blockSizeLimit = 0, + unsigned int flags = 0) { + (void)flags; + return hipOccupancyMaxPotentialBlockSize(gridSize, blockSize, reinterpret_cast(f), + dynSharedMemPerBlk, blockSizeLimit); +} +#endif // defined(__clang__) && defined(__HIP__) + +/** + * @brief Gets the address of a symbol. + * @ingroup Memory + * @param [out] devPtr - Returns device pointer associated with symbol. + * @param [in] symbol - Device symbol. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +template hipError_t hipGetSymbolAddress(void** devPtr, const T& symbol) { + return ::hipGetSymbolAddress(devPtr, (const void*)&symbol); +} +/** + * @ingroup Memory + * @brief Gets the size of a symbol. + * + * @param [out] size - Returns the size of a symbol. + * @param [in] symbol - Device symbol address. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +template hipError_t hipGetSymbolSize(size_t* size, const T& symbol) { + return ::hipGetSymbolSize(size, (const void*)&symbol); +} + +/** + * @ingroup Memory + * @brief Copies data to the given symbol on the device. + * + * @returns #hipSuccess, #hipErrorInvalidMemcpyDirection, #hipErrorInvalidValue + * + * @see hipMemcpyToSymbol + */ +template +hipError_t hipMemcpyToSymbol(const T& symbol, const void* src, size_t sizeBytes, + size_t offset __dparm(0), + hipMemcpyKind kind __dparm(hipMemcpyHostToDevice)) { + return ::hipMemcpyToSymbol((const void*)&symbol, src, sizeBytes, offset, kind); +} +/** + * @ingroup Memory + * @brief Copies data to the given symbol on the device asynchronously on the stream. + * + * @returns #hipSuccess, #hipErrorInvalidMemcpyDirection, #hipErrorInvalidValue + * + * @see hipMemcpyToSymbolAsync + */ +template +hipError_t hipMemcpyToSymbolAsync(const T& symbol, const void* src, size_t sizeBytes, size_t offset, + hipMemcpyKind kind, hipStream_t stream __dparm(0)) { + return ::hipMemcpyToSymbolAsync((const void*)&symbol, src, sizeBytes, offset, kind, stream); +} +/** + * @brief Copies data from the given symbol on the device. + * @ingroup Memory + * @returns #hipSuccess, #hipErrorInvalidMemcpyDirection, #hipErrorInvalidValue + * + * @see hipMemcpyFromSymbol + */ +template +hipError_t hipMemcpyFromSymbol(void* dst, const T& symbol, size_t sizeBytes, + size_t offset __dparm(0), + hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost)) { + return ::hipMemcpyFromSymbol(dst, (const void*)&symbol, sizeBytes, offset, kind); +} +/** + * @brief Copies data from the given symbol on the device asynchronously on the stream. + * @ingroup Memory + * @returns #hipSuccess, #hipErrorInvalidMemcpyDirection, #hipErrorInvalidValue + * + * @see hipMemcpyFromSymbolAsync + */ +template +hipError_t hipMemcpyFromSymbolAsync(void* dst, const T& symbol, size_t sizeBytes, size_t offset, + hipMemcpyKind kind, hipStream_t stream __dparm(0)) { + return ::hipMemcpyFromSymbolAsync(dst, (const void*)&symbol, sizeBytes, offset, kind, stream); +} + +/** + * @brief Returns occupancy for a kernel function. + * @ingroup Occupancy + * @param [out] numBlocks - Pointer of occupancy in number of blocks. + * @param [in] f - The kernel function to launch on the device. + * @param [in] blockSize - The block size as kernel launched. + * @param [in] dynSharedMemPerBlk - Dynamic shared memory in bytes per block. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +template +inline hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, T f, int blockSize, + size_t dynSharedMemPerBlk) { + return hipOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, reinterpret_cast(f), + blockSize, dynSharedMemPerBlk); +} +/** + * @brief Returns occupancy for a device function with the specified flags. + * + * @ingroup Occupancy + * @param [out] numBlocks - Pointer of occupancy in number of blocks. + * @param [in] f - The kernel function to launch on the device. + * @param [in] blockSize - The block size as kernel launched. + * @param [in] dynSharedMemPerBlk - Dynamic shared memory in bytes per block. + * @param [in] flags - Flag to handle the behavior for the occupancy calculator. + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + */ +template inline hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + int* numBlocks, T f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags) { + return hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + numBlocks, reinterpret_cast(f), blockSize, dynSharedMemPerBlk, flags); +} +/** + * @brief Returns grid and block size that achieves maximum potential occupancy for a device + * function + * + * @ingroup Occupancy + * Returns in \p *min_grid_size and \p *block_size a suggested grid / + * block size pair that achieves the best potential occupancy + * (i.e. the maximum number of active warps on the current device with the smallest number + * of blocks for a particular function). + * + * @param [out] min_grid_size minimum grid size needed to achieve the best potential occupancy + * @param [out] block_size block size required for the best potential occupancy + * @param [in] func device function symbol + * @param [in] block_size_to_dynamic_smem_size - a unary function/functor that takes block size, + * and returns the size, in bytes, of dynamic shared memory needed for a block + * @param [in] block_size_limit the maximum block size \p func is designed to work with. 0 means no + * limit. + * @param [in] flags reserved + * + * @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidDeviceFunction, + * #hipErrorInvalidValue, #hipErrorUnknown + */ +template +static hipError_t __host__ inline hipOccupancyMaxPotentialBlockSizeVariableSMemWithFlags( + int* min_grid_size, int* block_size, T func, UnaryFunction block_size_to_dynamic_smem_size, + int block_size_limit = 0, unsigned int flags = 0) { + if (min_grid_size == nullptr || block_size == nullptr || + reinterpret_cast(func) == nullptr) { + return hipErrorInvalidValue; + } + + int dev; + hipError_t status; + if ((status = hipGetDevice(&dev)) != hipSuccess) { + return status; + } + + int max_threads_per_cu; + if ((status = hipDeviceGetAttribute(&max_threads_per_cu, + hipDeviceAttributeMaxThreadsPerMultiProcessor, dev)) != + hipSuccess) { + return status; + } + + int warp_size; + if ((status = hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, dev)) != hipSuccess) { + return status; + } + + int max_cu_count; + if ((status = hipDeviceGetAttribute(&max_cu_count, hipDeviceAttributeMultiprocessorCount, dev)) != + hipSuccess) { + return status; + } + + struct hipFuncAttributes attr; + if ((status = hipFuncGetAttributes(&attr, reinterpret_cast(func))) != hipSuccess) { + return status; + } + + // Initial limits for the execution + const int func_max_threads_per_block = attr.maxThreadsPerBlock; + if (block_size_limit == 0) { + block_size_limit = func_max_threads_per_block; + } + + if (func_max_threads_per_block < block_size_limit) { + block_size_limit = func_max_threads_per_block; + } + + const int block_size_limit_aligned = + ((block_size_limit + (warp_size - 1)) / warp_size) * warp_size; + + // For maximum search + int max_threads = 0; + int max_block_size{}; + int max_num_blocks{}; + for (int block_size_check_aligned = block_size_limit_aligned; block_size_check_aligned > 0; + block_size_check_aligned -= warp_size) { + // Make sure the logic uses the requested limit and not aligned + int block_size_check = + (block_size_limit < block_size_check_aligned) ? block_size_limit : block_size_check_aligned; + + size_t dyn_smem_size = block_size_to_dynamic_smem_size(block_size_check); + int optimal_blocks; + if ((status = hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + &optimal_blocks, func, block_size_check, dyn_smem_size, flags)) != hipSuccess) { + return status; + } + + int total_threads = block_size_check * optimal_blocks; + if (total_threads > max_threads) { + max_block_size = block_size_check; + max_num_blocks = optimal_blocks; + max_threads = total_threads; + } + + // Break if the logic reached possible maximum + if (max_threads_per_cu == max_threads) { + break; + } + } + + // Grid size is the number of blocks per CU * CU count + *min_grid_size = max_num_blocks * max_cu_count; + *block_size = max_block_size; + + return status; +} + +/** + * @brief Returns grid and block size that achieves maximum potential occupancy for a device + * function + * + * @ingroup Occupancy + * Returns in \p *min_grid_size and \p *block_size a suggested grid / + * block size pair that achieves the best potential occupancy + * (i.e. the maximum number of active warps on the current device with the smallest number + * of blocks for a particular function). + * + * @param [out] min_grid_size minimum grid size needed to achieve the best potential occupancy + * @param [out] block_size block size required for the best potential occupancy + * @param [in] func device function symbol + * @param [in] block_size_to_dynamic_smem_size - a unary function/functor that takes block size, + * and returns the size, in bytes, of dynamic shared memory needed for a block + * @param [in] block_size_limit the maximum block size \p func is designed to work with. 0 means no + * limit. + * + * @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidDeviceFunction, + * #hipErrorInvalidValue, #hipErrorUnknown + */ +template +static hipError_t __host__ inline hipOccupancyMaxPotentialBlockSizeVariableSMem( + int* min_grid_size, int* block_size, T func, UnaryFunction block_size_to_dynamic_smem_size, + int block_size_limit = 0) { + return hipOccupancyMaxPotentialBlockSizeVariableSMemWithFlags( + min_grid_size, block_size, func, block_size_to_dynamic_smem_size, block_size_limit); +} +/** + * @brief Returns grid and block size that achieves maximum potential occupancy for a device + * function + * + * @ingroup Occupancy + * + * Returns in \p *min_grid_size and \p *block_size a suggested grid / + * block size pair that achieves the best potential occupancy + * (i.e. the maximum number of active warps on the current device with the smallest number + * of blocks for a particular function). + * + * @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @see hipOccupancyMaxPotentialBlockSize + */ +template inline hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, + int* blockSize, F kernel, + size_t dynSharedMemPerBlk, + uint32_t blockSizeLimit) { + return hipOccupancyMaxPotentialBlockSize(gridSize, blockSize, (hipFunction_t)kernel, + dynSharedMemPerBlk, blockSizeLimit); +} + +/** + * @brief Returns dynamic shared memory available per block when launching numBlocks blocks on SM. + * + * @ingroup Occupancy + * Returns in \p *dynamicSmemSize the maximum size of dynamic shared memory / + * to allow numBlocks blocks per SM. + * + * @param [out] dynamicSmemSize Returned maximum dynamic shared memory. + * @param [in] f Kernel function for which occupancy is calculated. + * @param [in] numBlocks Number of blocks to fit on SM + * @param [in] blockSize Size of the block + * + * @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidDeviceFunction, #hipErrorInvalidValue, + * #hipErrorUnknown + */ +template +inline hipError_t hipOccupancyAvailableDynamicSMemPerBlock(size_t* dynamicSmemSize, F f, + int numBlocks, int blockSize) { + return hipOccupancyAvailableDynamicSMemPerBlock(dynamicSmemSize, reinterpret_cast(f), + numBlocks, blockSize); +} +/** + * @brief Launches a device function + * + * @ingroup Execution + * @ingroup ModuleCooperativeG + * + * \tparam T The type of the kernel function. + * + * @param [in] f Kernel function to launch. + * @param [in] gridDim Grid dimensions specified as multiple of blockDim. + * @param [in] blockDim Block dimensions specified in work-items. + * @param [in] kernelParams A list of kernel arguments. + * @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for + * this kernel. The HIP-Clang compiler provides + * support for extern shared declarations. + * @param [in] stream Stream which on the kernel launched. + * + * @return #hipSuccess, #hipErrorLaunchFailure, #hipErrorInvalidValue, + * #hipErrorInvalidResourceHandle + * + */ +template +inline hipError_t hipLaunchCooperativeKernel(T f, dim3 gridDim, dim3 blockDim, void** kernelParams, + unsigned int sharedMemBytes, hipStream_t stream) { + return hipLaunchCooperativeKernel(reinterpret_cast(f), gridDim, blockDim, + kernelParams, sharedMemBytes, stream); +} +/** + * @brief Launches kernel function on multiple devices, where thread blocks can + * cooperate and synchronize on execution. + * + * @ingroup Execution + * @ingroup ModuleCooperativeG + * + * @param [in] launchParamsList List of kernel launch parameters, one per device. + * @param [in] numDevices Size of launchParamsList array. + * @param [in] flags Flag to handle launch behavior. + * + * @return #hipSuccess, #hipErrorLaunchFailure, #hipErrorInvalidValue, + * #hipErrorInvalidResourceHandle + * + */ +template +inline hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, + unsigned int numDevices, + unsigned int flags = 0) { + return hipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags); +} +/** + * @brief Launches kernels on multiple devices and guarantees all specified kernels are dispatched + * on respective streams before enqueuing any other work on the specified streams from any other + * threads + * @ingroup Execution + * + * @param [in] launchParamsList List of launch parameters, one per device. + * @param [in] numDevices Size of the launchParamsList array. + * @param [in] flags Flags to control launch behavior. + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +template +inline hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, + unsigned int numDevices, + unsigned int flags = 0) { + return hipExtLaunchMultiKernelMultiDevice(launchParamsList, numDevices, flags); +} +/** + * @brief Binds a memory area to a texture [Deprecated] + * + * @ingroup TextureD + * + * @param [in] offset Offset in bytes. + * @param [in] tex Texture to bind. + * @param [in] devPtr Pointer of memory on the device. + * @param [in] size Size of memory in bites. + * + * @warning This API is deprecated. + * + */ +template HIP_DEPRECATED(HIP_DEPRECATED_MSG) +static inline hipError_t hipBindTexture(size_t* offset, const struct texture& tex, + const void* devPtr, size_t size = UINT_MAX) { + return hipBindTexture(offset, &tex, devPtr, &tex.channelDesc, size); +} +/** + * @brief Binds a memory area to a texture [Deprecated] + * + * @ingroup TextureD + * + * @param [in] offset Offset in bytes. + * @param [in] tex Texture to bind. + * @param [in] devPtr Pointer of memory on the device. + * @param [in] desc Texture channel format. + * @param [in] size Size of memory in bites. + * + * @warning This API is deprecated. + * + */ +template HIP_DEPRECATED(HIP_DEPRECATED_MSG) +static inline hipError_t + hipBindTexture(size_t* offset, const struct texture& tex, const void* devPtr, + const struct hipChannelFormatDesc& desc, size_t size = UINT_MAX) { + return hipBindTexture(offset, &tex, devPtr, &desc, size); +} +/** + * @brief Binds a 2D memory area to a texture [Deprecated] + * + * @ingroup TextureD + * + * @param [in] offset Offset in bytes. + * @param [in] tex Texture to bind. + * @param [in] devPtr Pointer of 2D memory area on the device. + * @param [in] width Width in texel units. + * @param [in] height Height in texel units. + * @param [in] pitch Pitch in bytes. + * + * @warning This API is deprecated. + * + */ +template HIP_DEPRECATED(HIP_DEPRECATED_MSG) +static inline hipError_t + hipBindTexture2D(size_t* offset, const struct texture& tex, + const void* devPtr, size_t width, size_t height, size_t pitch) { + return hipBindTexture2D(offset, &tex, devPtr, &tex.channelDesc, width, height, pitch); +} +/** + * @brief Binds a 2D memory area to a texture [Deprecated] + * + * @ingroup TextureD + * + * @param [in] offset Offset in bytes. + * @param [in] tex Texture to bind. + * @param [in] devPtr Pointer of 2D memory area on the device. + * @param [in] desc Texture channel format. + * @param [in] width Width in texel units. + * @param [in] height Height in texel units. + * @param [in] pitch Pitch in bytes. + * + * @warning This API is deprecated. + * + */ +template HIP_DEPRECATED(HIP_DEPRECATED_MSG) +static inline hipError_t + hipBindTexture2D(size_t* offset, const struct texture& tex, + const void* devPtr, const struct hipChannelFormatDesc& desc, size_t width, + size_t height, size_t pitch) { + return hipBindTexture2D(offset, &tex, devPtr, &desc, width, height, pitch); +} +/** + * @brief Binds an array to a texture [Deprecated] + * + * @ingroup TextureD + * + * @param [in] tex Texture to bind. + * @param [in] array Array of memory on the device. + * + * @warning This API is deprecated. + * + */ +template HIP_DEPRECATED(HIP_DEPRECATED_MSG) +static inline hipError_t + hipBindTextureToArray(const struct texture& tex, hipArray_const_t array) { + struct hipChannelFormatDesc desc; + hipError_t err = hipGetChannelDesc(&desc, array); + return (err == hipSuccess) ? hipBindTextureToArray(&tex, array, &desc) : err; +} +/** + * @brief Binds an array to a texture [Deprecated] + * + * @ingroup TextureD + * + * @param [in] tex Texture to bind. + * @param [in] array Array of memory on the device. + * @param [in] desc Texture channel format. + * + * @warning This API is deprecated. + * + */ +template HIP_DEPRECATED(HIP_DEPRECATED_MSG) +static inline hipError_t + hipBindTextureToArray(const struct texture& tex, hipArray_const_t array, + const struct hipChannelFormatDesc& desc) { + return hipBindTextureToArray(&tex, array, &desc); +} +/** + * @brief Binds a mipmapped array to a texture [Deprecated] + * + * @ingroup TextureD + * + * @param [in] tex Texture to bind. + * @param [in] mipmappedArray Mipmapped Array of memory on the device. + * + * @warning This API is deprecated. + * + */ +template HIP_DEPRECATED(HIP_DEPRECATED_MSG) +static inline hipError_t hipBindTextureToMipmappedArray(const struct texture& tex, + hipMipmappedArray_const_t mipmappedArray) { + struct hipChannelFormatDesc desc; + hipArray_t levelArray; + hipError_t err = hipGetMipmappedArrayLevel(&levelArray, mipmappedArray, 0); + if (err != hipSuccess) { + return err; + } + err = hipGetChannelDesc(&desc, levelArray); + return (err == hipSuccess) ? hipBindTextureToMipmappedArray(&tex, mipmappedArray, &desc) : err; +} +/** + * @brief Binds a mipmapped array to a texture [Deprecated] + * + * @ingroup TextureD + * + * @param [in] tex Texture to bind. + * @param [in] mipmappedArray Mipmapped Array of memory on the device. + * @param [in] desc Texture channel format. + * + * @warning This API is deprecated. + * + */ +template HIP_DEPRECATED(HIP_DEPRECATED_MSG) +static inline hipError_t hipBindTextureToMipmappedArray(const struct texture& tex, + hipMipmappedArray_const_t mipmappedArray, + const struct hipChannelFormatDesc& desc) { + return hipBindTextureToMipmappedArray(&tex, mipmappedArray, &desc); +} +/** + * @brief Unbinds a texture [Depreacated] + * + * @ingroup TextureD + * + * @param [in] tex Texture to unbind. + * + * @warning This API is deprecated. + * + */ +template HIP_DEPRECATED(HIP_DEPRECATED_MSG) +static inline hipError_t hipUnbindTexture(const struct texture& tex) { + return hipUnbindTexture(&tex); +} +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @ingroup StreamO + * @{ + * + * This section describes wrappers for stream Ordered allocation from memory pool functions of + * HIP runtime API. + * + * @note APIs in this section are implemented on Linux, under development on Windows. + * + */ + +/** + * @brief C++ wrappers for allocations from a memory pool + * + * This is an alternate C++ calls for @p hipMallocFromPoolAsync made available through + * function overloading. + * + * @see hipMallocFromPoolAsync + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +static inline hipError_t hipMallocAsync(void** dev_ptr, size_t size, hipMemPool_t mem_pool, + hipStream_t stream) { + return hipMallocFromPoolAsync(dev_ptr, size, mem_pool, stream); +} +/** + * @brief C++ wrappers for allocations from a memory pool on the stream + * + * This is an alternate C++ calls for @p hipMallocFromPoolAsync made available through + * function overloading. + * + * @see hipMallocFromPoolAsync + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +template static inline hipError_t hipMallocAsync(T** dev_ptr, size_t size, + hipMemPool_t mem_pool, + hipStream_t stream) { + return hipMallocFromPoolAsync(reinterpret_cast(dev_ptr), size, mem_pool, stream); +} +/** + * @brief C++ wrappers for allocations from a memory pool + * + * This is an alternate C++ calls for @p hipMallocFromPoolAsync made available through + * function overloading. + * + * @see hipMallocFromPoolAsync + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +template +static inline hipError_t hipMallocAsync(T** dev_ptr, size_t size, hipStream_t stream) { + return hipMallocAsync(reinterpret_cast(dev_ptr), size, stream); +} +/** + * @brief C++ wrappers for allocations from a memory pool + * + * This is an alternate C++ calls for @p hipMallocFromPoolAsync made available through + * function overloading. + * + * @see hipMallocFromPoolAsync + * + * @note This API is implemented on Linux and is under development on Microsoft Windows. + */ +template static inline hipError_t hipMallocFromPoolAsync(T** dev_ptr, size_t size, + hipMemPool_t mem_pool, + hipStream_t stream) { + return hipMallocFromPoolAsync(reinterpret_cast(dev_ptr), size, mem_pool, stream); +} +/** + * @brief Launches a HIP kernel using the specified configuration. + * @ingroup Execution + * + * This function dispatches the provided kernel with the given launch configuration and forwards the + * kernel arguments. + * + * @param [in] config Pointer to the kernel launch configuration structure. + * @param [in] kernel Pointer to the device kernel function to be launched. + * @param [in] args Variadic list of arguments to be passed to the kernel. + * + * @returns #hipSuccess if the kernel is launched successfully, otherwise an appropriate error code. + */ +template +static inline __host__ hipError_t hipLaunchKernelEx(const hipLaunchConfig_t* config, + void (*kernel)(KernelArgs...), + Params&&... args) { + return [&](KernelArgs... convertedArgs) { + void* pArgs[] = {&convertedArgs...}; + return ::hipLaunchKernelExC(config, reinterpret_cast(kernel), pArgs); + }(std::forward(args)...); +} +/** + * @} + */ + + +#endif // __cplusplus + +#ifdef __GNUC__ +#pragma GCC visibility pop +#endif + + +#elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#include "hip/nvidia_detail/nvidia_hip_runtime_api.h" +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif + + +/** + * @brief: C++ wrapper for hipMalloc + * @ingroup Memory + * Perform automatic type conversion to eliminate the need for excessive typecasting (ie void**) + * + * __HIP_DISABLE_CPP_FUNCTIONS__ macro can be defined to suppress these + * wrappers. It is useful for applications which need to obtain decltypes of + * HIP runtime APIs. + * + * @see hipMalloc + */ +#if defined(__cplusplus) && !defined(__HIP_DISABLE_CPP_FUNCTIONS__) +template static inline hipError_t hipMalloc(T** devPtr, size_t size) { + return hipMalloc((void**)devPtr, size); +} +/** + * @brief: C++ wrapper for hipMallocPitch + * @ingroup Memory + * Perform automatic type conversion to eliminate the need for excessive typecasting (ie void**) + * + * __HIP_DISABLE_CPP_FUNCTIONS__ macro can be defined to suppress these + * wrappers. It is useful for applications which need to obtain decltypes of + * HIP runtime APIs. + * + * @see hipMallocPitch + */ +template +static inline hipError_t hipMallocPitch(T** devPtr, size_t* pitch, size_t width, size_t height) { + return hipMallocPitch((void**)devPtr, pitch, width, height); +} +/** + * @brief: C++ wrapper for hipHostMalloc + * @ingroup Memory + * Provide an override to automatically typecast the pointer type from void**, and also provide a + * default for the flags. + * + * __HIP_DISABLE_CPP_FUNCTIONS__ macro can be defined to suppress these + * wrappers. It is useful for applications which need to obtain decltypes of + * HIP runtime APIs. + * + * @see hipHostMalloc + */ +template +static inline hipError_t hipHostMalloc(T** ptr, size_t size, + unsigned int flags = hipHostMallocDefault) { + return hipHostMalloc((void**)ptr, size, flags); +} +/** + * @brief: C++ wrapper for hipHostAlloc + * @ingroup Memory + * Provide an override to automatically typecast the pointer type from void**, and also provide a + * default for the flags. + * + * __HIP_DISABLE_CPP_FUNCTIONS__ macro can be defined to suppress these + * wrappers. It is useful for applications which need to obtain decltypes of + * HIP runtime APIs. + * + * @see hipHostAlloc + */ +template static inline hipError_t hipHostAlloc(T** ptr, size_t size, + unsigned int flags = hipHostAllocDefault) { + return hipHostAlloc((void**)ptr, size, flags); +} +/** + * @brief: C++ wrapper for hipMallocManaged + * + * @ingroup MemoryM + * Provide an override to automatically typecast the pointer type from void**, and also provide a + * default for the flags. + * + * __HIP_DISABLE_CPP_FUNCTIONS__ macro can be defined to suppress these + * wrappers. It is useful for applications which need to obtain decltypes of + * HIP runtime APIs. + * + * @see hipMallocManaged + * + */ +template +static inline hipError_t hipMallocManaged(T** devPtr, size_t size, + unsigned int flags = hipMemAttachGlobal) { + return hipMallocManaged((void**)devPtr, size, flags); +} + + +#endif +#endif +// doxygen end HIP API +/** + * @} + */ +#include + +#if USE_PROF_API +#include +#endif diff --git a/external/hip/hip_texture_types.h b/external/hip/hip_texture_types.h new file mode 100644 index 0000000..9cefbe6 --- /dev/null +++ b/external/hip/hip_texture_types.h @@ -0,0 +1,29 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + + +#ifndef HIP_INCLUDE_HIP_HIP_TEXTURE_TYPES_H +#define HIP_INCLUDE_HIP_HIP_TEXTURE_TYPES_H + +#include + +#endif diff --git a/external/hip/hip_vector_types.h b/external/hip/hip_vector_types.h new file mode 100644 index 0000000..98a0bcd --- /dev/null +++ b/external/hip/hip_vector_types.h @@ -0,0 +1,41 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +//! hip_vector_types.h : Defines the HIP vector types. + +#ifndef HIP_INCLUDE_HIP_HIP_VECTOR_TYPES_H +#define HIP_INCLUDE_HIP_HIP_VECTOR_TYPES_H + +#include + + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) +#if __cplusplus +#include +#endif +#elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#include +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif + +#endif diff --git a/external/hip/hiprtc.h b/external/hip/hiprtc.h new file mode 100644 index 0000000..f4f8b88 --- /dev/null +++ b/external/hip/hiprtc.h @@ -0,0 +1,473 @@ +/* +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#pragma once + +#include +#include + +#if !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#include +#elif defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) + +#ifdef __cplusplus +#include +#else +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +#if !defined(_WIN32) +#pragma GCC visibility push(default) +#endif + +/** + * + * @addtogroup GlobalDefs + * @{ + * + */ +/** + * hiprtc error code + */ +typedef enum hiprtcResult { + HIPRTC_SUCCESS = 0, ///< Success + HIPRTC_ERROR_OUT_OF_MEMORY = 1, ///< Out of memory + HIPRTC_ERROR_PROGRAM_CREATION_FAILURE = 2, ///< Failed to create program + HIPRTC_ERROR_INVALID_INPUT = 3, ///< Invalid input + HIPRTC_ERROR_INVALID_PROGRAM = 4, ///< Invalid program + HIPRTC_ERROR_INVALID_OPTION = 5, ///< Invalid option + HIPRTC_ERROR_COMPILATION = 6, ///< Compilation error + HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7, ///< Failed in builtin operation + HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = 8, ///< No name expression after compilation + HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = 9, ///< No lowered names before compilation + HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID = 10, ///< Invalid name expression + HIPRTC_ERROR_INTERNAL_ERROR = 11, ///< Internal error + HIPRTC_ERROR_LINKING = 100 ///< Error in linking +} hiprtcResult; +/** + * hiprtc JIT option + */ +#define hiprtcJIT_option hipJitOption +#define HIPRTC_JIT_MAX_REGISTERS \ + hipJitOptionMaxRegisters ///< CUDA Only Maximum registers may be used in a + ///< thread, passed to compiler +#define HIPRTC_JIT_THREADS_PER_BLOCK \ + hipJitOptionThreadsPerBlock ///< CUDA Only Number of thread per block +#define HIPRTC_JIT_WALL_TIME hipJitOptionWallTime ///< CUDA Only Value for total wall clock time +#define HIPRTC_JIT_INFO_LOG_BUFFER \ + hipJitOptionInfoLogBuffer ///< CUDA Only Pointer to the buffer with + ///< logged information +#define HIPRTC_JIT_INFO_LOG_BUFFER_SIZE_BYTES \ + hipJitOptionInfoLogBufferSizeBytes ///< CUDA Only Size of the buffer + ///< in bytes for logged info +#define HIPRTC_JIT_ERROR_LOG_BUFFER \ + hipJitOptionErrorLogBuffer ///< CUDA Only Pointer to the buffer + ///< with logged error(s) +#define HIPRTC_JIT_ERROR_LOG_BUFFER_SIZE_BYTES \ + hipJitOptionErrorLogBufferSizeBytes ///< CUDA Only Size of the buffer in + ///< bytes for logged error(s) +#define HIPRTC_JIT_OPTIMIZATION_LEVEL \ + hipJitOptionOptimizationLevel ///< Value of optimization level for + ///< generated codes, acceptable + ///< options -O0, -O1, -O2, -O3 +#define HIPRTC_JIT_TARGET_FROM_HIPCONTEXT \ + hipJitOptionTargetFromContext ///< CUDA Only The target context, + ///< which is the default +#define HIPRTC_JIT_TARGET hipJitOptionTarget ///< CUDA Only JIT target +#define HIPRTC_JIT_FALLBACK_STRATEGY hipJitOptionFallbackStrategy ///< CUDA Only Fallback strategy +#define HIPRTC_JIT_GENERATE_DEBUG_INFO \ + hipJitOptionGenerateDebugInfo ///< CUDA Only Generate debug information +#define HIPRTC_JIT_LOG_VERBOSE hipJitOptionLogVerbose ///< CUDA Only Generate log verbose +#define HIPRTC_JIT_GENERATE_LINE_INFO \ + hipJitOptionGenerateLineInfo ///< CUDA Only Generate line number information +#define HIPRTC_JIT_CACHE_MODE hipJitOptionCacheMode ///< CUDA Only Set cache mode +#define HIPRTC_JIT_NEW_SM3X_OPT hipJitOptionSm3xOpt ///< @deprecated CUDA Only New SM3X option. +#define HIPRTC_JIT_FAST_COMPILE hipJitOptionFastCompile ///< CUDA Only Set fast compile +#define HIPRTC_JIT_GLOBAL_SYMBOL_NAMES \ + hipJitOptionGlobalSymbolNames ///< CUDA Only Array of device symbol names to be + ///< relocated to the host +#define HIPRTC_JIT_GLOBAL_SYMBOL_ADDRESS \ + hipJitOptionGlobalSymbolAddresses ///< CUDA Only Array of host addresses to be + ///< relocated to the device +#define HIPRTC_JIT_GLOBAL_SYMBOL_COUNT \ + hipJitOptionGlobalSymbolCount ///< CUDA Only Number of symbol count. +#define HIPRTC_JIT_LTO \ + hipJitOptionLto ///< @deprecated CUDA Only Enable link-time + ///< optimization for device code +#define HIPRTC_JIT_FTZ \ + hipJitOptionFtz ///< @deprecated CUDA Only Set + ///< single-precision denormals. +#define HIPRTC_JIT_PREC_DIV \ + hipJitOptionPrecDiv ///< @deprecated CUDA Only Set + ///< single-precision floating-point division + ///< and reciprocals +#define HIPRTC_JIT_PREC_SQRT \ + hipJitOptionPrecSqrt ///< @deprecated CUDA Only Set + ///< single-precision floating-point + ///< square root +#define HIPRTC_JIT_FMA \ + hipJitOptionFma ///< @deprecated CUDA Only Enable + ///< floating-point multiplies and + ///< adds/subtracts operations +#define HIPRTC_JIT_POSITION_INDEPENDENT_CODE \ + hipJitOptionPositionIndependentCode ///< CUDA Only Generates + ///< Position Independent code +#define HIPRTC_JIT_MIN_CTA_PER_SM \ + hipJitOptionMinCTAPerSM ///< CUDA Only Hints to JIT compiler + ///< the minimum number of CTAs frin + ///< kernel's grid to be mapped to SM +#define HIPRTC_JIT_MAX_THREADS_PER_BLOCK \ + hipJitOptionMaxThreadsPerBlock ///< CUDA only Maximum number of + ///< threads in a thread block +#define HIPRTC_JIT_OVERRIDE_DIRECT_VALUES \ + hipJitOptionOverrideDirectiveValues ///< CUDA only Override Directive + ///< Values +#define HIPRTC_JIT_NUM_OPTIONS hipJitOptionNumOptions ///< Number of options +#define HIPRTC_JIT_IR_TO_ISA_OPT_EXT \ + hipJitOptionIRtoISAOptExt ///< HIP Only Linker options to be + ///< passed on to compiler +#define HIPRTC_JIT_IR_TO_ISA_OPT_COUNT_EXT \ + hipJitOptionIRtoISAOptCountExt ///< HIP Only Count of linker options + ///< to be passed on to +/** + * hiprtc JIT input type + */ +#define hiprtcJITInputType hipJitInputType +#define HIPRTC_JIT_INPUT_CUBIN hipJitInputCubin ///< Cuda only Input Cubin +#define HIPRTC_JIT_INPUT_PTX hipJitInputPtx ///< Cuda only Input PTX +#define HIPRTC_JIT_INPUT_FATBINARY hipJitInputFatBinary ///< Cuda Only Input FAT Binary +#define HIPRTC_JIT_INPUT_OBJECT \ + hipJitInputObject ///< Cuda Only Host Object with embedded device code +#define HIPRTC_JIT_INPUT_LIBRARY \ + hipJitInputLibrary ///< Cuda Only Archive of Host Objects with embedded device code +#define HIPRTC_JIT_INPUT_NVVM \ + hipJitInputNvvm ///< @deprecated CUDA only High Level intermediate code for LTO +#define HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES \ + hipJitNumLegacyInputTypes ///< Count of Legacy Input Types +#define HIPRTC_JIT_INPUT_LLVM_BITCODE \ + hipJitInputLLVMBitcode ///< HIP Only LLVM Bitcode or IR assembly +#define HIPRTC_JIT_INPUT_LLVM_BUNDLED_BITCODE \ + hipJitInputLLVMBundledBitcode ///< HIP Only LLVM Clang Bundled Code +#define HIPRTC_JIT_INPUT_LLVM_ARCHIVES_OF_BUNDLED_BITCODE \ + hipJitInputLLVMArchivesOfBundledBitcode ///< HIP Only LLVM + ///< Archives of + ///< Bundled Bitcode +#define HIPRTC_JIT_INPUT_SPIRV hipJitInputSpirv ///< HIP Only SPIRV Code Object +#define HIPRTC_JIT_NUM_INPUT_TYPES hipJitNumInputTypes ///< Count of Input Types +/** + * @} + */ + +/** + * hiprtc link state + * + */ +typedef struct ihiprtcLinkState* hiprtcLinkState; +/** + * @ingroup Runtime + * + * @brief Returns text string message to explain the error which occurred + * + * @param [in] result code to convert to string. + * @returns const char pointer to the NULL-terminated error string + * + * @warning In HIP, this function returns the name of the error, + * if the hiprtc result is defined, it will return "Invalid HIPRTC error code" + * + * @see hiprtcResult + */ +const char* hiprtcGetErrorString(hiprtcResult result); + +/** + * @ingroup Runtime + * @brief Sets the parameters as major and minor version. + * + * @param [out] major HIP Runtime Compilation major version. + * @param [out] minor HIP Runtime Compilation minor version. + * + * @returns #HIPRTC_ERROR_INVALID_INPUT, #HIPRTC_SUCCESS + * + */ +hiprtcResult hiprtcVersion(int* major, int* minor); + +/** + * hiprtc program + * + */ +typedef struct _hiprtcProgram* hiprtcProgram; + +/** + * @ingroup Runtime + * @brief Adds the given name exprssion to the runtime compilation program. + * + * @param [in] prog runtime compilation program instance. + * @param [in] name_expression const char pointer to the name expression. + * @returns #HIPRTC_SUCCESS + * + * If const char pointer is NULL, it will return #HIPRTC_ERROR_INVALID_INPUT. + * + * @see hiprtcResult + */ +hiprtcResult hiprtcAddNameExpression(hiprtcProgram prog, const char* name_expression); + +/** + * @ingroup Runtime + * @brief Compiles the given runtime compilation program. + * + * @param [in] prog runtime compilation program instance. + * @param [in] numOptions number of compiler options. + * @param [in] options compiler options as const array of strins. + * @returns #HIPRTC_SUCCESS + * + * If the compiler failed to build the runtime compilation program, + * it will return #HIPRTC_ERROR_COMPILATION. + * + * @see hiprtcResult + */ +hiprtcResult hiprtcCompileProgram(hiprtcProgram prog, int numOptions, const char* const* options); + +/** + * @ingroup Runtime + * @brief Creates an instance of hiprtcProgram with the given input parameters, + * and sets the output hiprtcProgram prog with it. + * + * @param [in, out] prog runtime compilation program instance. + * @param [in] src const char pointer to the program source. + * @param [in] name const char pointer to the program name. + * @param [in] numHeaders number of headers. + * @param [in] headers array of strings pointing to headers. + * @param [in] includeNames array of strings pointing to names included in program source. + * @returns #HIPRTC_SUCCESS + * + * Any invalide input parameter, it will return #HIPRTC_ERROR_INVALID_INPUT + * or #HIPRTC_ERROR_INVALID_PROGRAM. + * + * If failed to create the program, it will return #HIPRTC_ERROR_PROGRAM_CREATION_FAILURE. + * + * @see hiprtcResult + */ +hiprtcResult hiprtcCreateProgram(hiprtcProgram* prog, const char* src, const char* name, + int numHeaders, const char* const* headers, + const char* const* includeNames); + +/** + * @brief Destroys an instance of given hiprtcProgram. + * @ingroup Runtime + * @param [in] prog runtime compilation program instance. + * @returns #HIPRTC_SUCCESS + * + * If prog is NULL, it will return #HIPRTC_ERROR_INVALID_INPUT. + * + * @see hiprtcResult + */ +hiprtcResult hiprtcDestroyProgram(hiprtcProgram* prog); + +/** + * @brief Gets the lowered (mangled) name from an instance of hiprtcProgram with the given input + * parameters, and sets the output lowered_name with it. + * @ingroup Runtime + * @param [in] prog runtime compilation program instance. + * @param [in] name_expression const char pointer to the name expression. + * @param [in, out] lowered_name const char array to the lowered (mangled) name. + * @returns #HIPRTC_SUCCESS + * + * If any invalide nullptr input parameters, it will return #HIPRTC_ERROR_INVALID_INPUT + * + * If name_expression is not found, it will return #HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID + * + * If failed to get lowered_name from the program, it will return #HIPRTC_ERROR_COMPILATION. + * + * @see hiprtcResult + */ +hiprtcResult hiprtcGetLoweredName(hiprtcProgram prog, const char* name_expression, + const char** lowered_name); + +/** + * @brief Gets the log generated by the runtime compilation program instance. + * @ingroup Runtime + * @param [in] prog runtime compilation program instance. + * @param [out] log memory pointer to the generated log. + * @returns #HIPRTC_SUCCESS + * + * @see hiprtcResult + */ +hiprtcResult hiprtcGetProgramLog(hiprtcProgram prog, char* log); + +/** + * @brief Gets the size of log generated by the runtime compilation program instance. + * + * @param [in] prog runtime compilation program instance. + * @param [out] logSizeRet size of generated log. + * @returns #HIPRTC_SUCCESS + * + * @see hiprtcResult + */ +hiprtcResult hiprtcGetProgramLogSize(hiprtcProgram prog, size_t* logSizeRet); + +/** + * @brief Gets the pointer of compilation binary by the runtime compilation program instance. + * @ingroup Runtime + * @param [in] prog runtime compilation program instance. + * @param [out] code char pointer to binary. + * @returns #HIPRTC_SUCCESS + * + * @see hiprtcResult + */ +hiprtcResult hiprtcGetCode(hiprtcProgram prog, char* code); + +/** + * @brief Gets the size of compilation binary by the runtime compilation program instance. + * @ingroup Runtime + * @param [in] prog runtime compilation program instance. + * @param [out] codeSizeRet the size of binary. + * @returns #HIPRTC_SUCCESS + * + * @see hiprtcResult + */ +hiprtcResult hiprtcGetCodeSize(hiprtcProgram prog, size_t* codeSizeRet); + +/** + * @brief Gets the pointer of compiled bitcode by the runtime compilation program instance. + * + * @param [in] prog runtime compilation program instance. + * @param [out] bitcode char pointer to bitcode. + * @return HIPRTC_SUCCESS + * + * @see hiprtcResult + */ +hiprtcResult hiprtcGetBitcode(hiprtcProgram prog, char* bitcode); + +/** + * @brief Gets the size of compiled bitcode by the runtime compilation program instance. + * @ingroup Runtime + * + * @param [in] prog runtime compilation program instance. + * @param [out] bitcode_size the size of bitcode. + * @returns #HIPRTC_SUCCESS + * + * @see hiprtcResult + */ +hiprtcResult hiprtcGetBitcodeSize(hiprtcProgram prog, size_t* bitcode_size); + +/** + * @brief Creates the link instance via hiprtc APIs. + * @ingroup Runtime + * @param [in] num_options Number of options + * @param [in] option_ptr Array of options + * @param [in] option_vals_pptr Array of option values cast to void* + * @param [out] hip_link_state_ptr hiprtc link state created upon success + * + * @returns #HIPRTC_SUCCESS, #HIPRTC_ERROR_INVALID_INPUT, #HIPRTC_ERROR_INVALID_OPTION + * + * @see hiprtcResult + */ +hiprtcResult hiprtcLinkCreate(unsigned int num_options, hiprtcJIT_option* option_ptr, + void** option_vals_pptr, hiprtcLinkState* hip_link_state_ptr); + +/** + * @brief Adds a file with bit code to be linked with options + * @ingroup Runtime + * @param [in] hip_link_state hiprtc link state + * @param [in] input_type Type of the input data or bitcode + * @param [in] file_path Path to the input file where bitcode is present + * @param [in] num_options Size of the options + * @param [in] options_ptr Array of options applied to this input + * @param [in] option_values Array of option values cast to void* + * + * @returns #HIPRTC_SUCCESS + * + * If input values are invalid, it will + * @return #HIPRTC_ERROR_INVALID_INPUT + * + * @see hiprtcResult + */ + +hiprtcResult hiprtcLinkAddFile(hiprtcLinkState hip_link_state, hiprtcJITInputType input_type, + const char* file_path, unsigned int num_options, + hiprtcJIT_option* options_ptr, void** option_values); + +/** + * @brief Completes the linking of the given program. + * @ingroup Runtime + * @param [in] hip_link_state hiprtc link state + * @param [in] input_type Type of the input data or bitcode + * @param [in] image Input data which is null terminated + * @param [in] image_size Size of the input data + * @param [in] name Optional name for this input + * @param [in] num_options Size of the options + * @param [in] options_ptr Array of options applied to this input + * @param [in] option_values Array of option values cast to void* + * + * @returns #HIPRTC_SUCCESS, #HIPRTC_ERROR_INVALID_INPUT + * + * If adding the file fails, it will + * @return #HIPRTC_ERROR_PROGRAM_CREATION_FAILURE + * + * @see hiprtcResult + */ + +hiprtcResult hiprtcLinkAddData(hiprtcLinkState hip_link_state, hiprtcJITInputType input_type, + void* image, size_t image_size, const char* name, + unsigned int num_options, hiprtcJIT_option* options_ptr, + void** option_values); + +/** + * @brief Completes the linking of the given program. + * @ingroup Runtime + * @param [in] hip_link_state hiprtc link state + * @param [out] bin_out Upon success, points to the output binary + * @param [out] size_out Size of the binary is stored (optional) + * + * @returns #HIPRTC_SUCCESS + * + * If adding the data fails, it will + * @return #HIPRTC_ERROR_LINKING + * + * @see hiprtcResult + */ +hiprtcResult hiprtcLinkComplete(hiprtcLinkState hip_link_state, void** bin_out, size_t* size_out); + +/** + * @brief Deletes the link instance via hiprtc APIs. + * @ingroup Runtime + * @param [in] hip_link_state link state instance + * + * @returns #HIPRTC_SUCCESS + * + * @see hiprtcResult + */ +hiprtcResult hiprtcLinkDestroy(hiprtcLinkState hip_link_state); + +#if !defined(_WIN32) +#pragma GCC visibility pop +#endif + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif diff --git a/external/hip/library_types.h b/external/hip/library_types.h new file mode 100644 index 0000000..c3c8d5d --- /dev/null +++ b/external/hip/library_types.h @@ -0,0 +1,84 @@ +/* +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_LIBRARY_TYPES_H +#define HIP_INCLUDE_HIP_LIBRARY_TYPES_H + +#if !defined(__HIPCC_RTC__) +#include +#endif + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) + +typedef enum hipDataType { + HIP_R_32F = 0, + HIP_R_64F = 1, + HIP_R_16F = 2, + HIP_R_8I = 3, + HIP_C_32F = 4, + HIP_C_64F = 5, + HIP_C_16F = 6, + HIP_C_8I = 7, + HIP_R_8U = 8, + HIP_C_8U = 9, + HIP_R_32I = 10, + HIP_C_32I = 11, + HIP_R_32U = 12, + HIP_C_32U = 13, + HIP_R_16BF = 14, + HIP_C_16BF = 15, + HIP_R_4I = 16, + HIP_C_4I = 17, + HIP_R_4U = 18, + HIP_C_4U = 19, + HIP_R_16I = 20, + HIP_C_16I = 21, + HIP_R_16U = 22, + HIP_C_16U = 23, + HIP_R_64I = 24, + HIP_C_64I = 25, + HIP_R_64U = 26, + HIP_C_64U = 27, + HIP_R_8F_E4M3 = 28, + HIP_R_8F_E5M2 = 29, + HIP_R_8F_UE8M0 = 30, + HIP_R_6F_E2M3 = 31, + HIP_R_6F_E3M2 = 32, + HIP_R_4F_E2M1 = 33, + // HIP specific Data Types + HIP_R_8F_E4M3_FNUZ = 1000, + HIP_R_8F_E5M2_FNUZ = 1001, +} hipDataType; + +typedef enum hipLibraryPropertyType { + HIP_LIBRARY_MAJOR_VERSION, + HIP_LIBRARY_MINOR_VERSION, + HIP_LIBRARY_PATCH_LEVEL +} hipLibraryPropertyType; + +#elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#include "library_types.h" +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif + +#endif diff --git a/external/hip/linker_types.h b/external/hip/linker_types.h new file mode 100755 index 0000000..1131910 --- /dev/null +++ b/external/hip/linker_types.h @@ -0,0 +1,138 @@ + +/* +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_LINKER_TYPES_H +#define HIP_INCLUDE_HIP_LINKER_TYPES_H + +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wreserved-identifier" +#pragma clang diagnostic ignored "-Wreserved-macro-identifier" +#endif + + +#if !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#elif defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) + +/** + * @defgroup LinkerTypes Jit Linker Data Types + * @{ + * This section describes the Jit Linker data types. + * + */ + +/** + * hipJitOption + */ +typedef enum hipJitOption { + hipJitOptionMaxRegisters = 0, ///< CUDA Only Maximum registers may be used in a thread, + ///< passed to compiler + hipJitOptionThreadsPerBlock, ///< CUDA Only Number of thread per block + hipJitOptionWallTime, ///< CUDA Only Value for total wall clock time + hipJitOptionInfoLogBuffer, ///< CUDA Only Pointer to the buffer with logged information + hipJitOptionInfoLogBufferSizeBytes, ///< CUDA Only Size of the buffer in bytes for logged info + hipJitOptionErrorLogBuffer, ///< CUDA Only Pointer to the buffer with logged error(s) + hipJitOptionErrorLogBufferSizeBytes, ///< CUDA Only Size of the buffer in bytes for logged + ///< error(s) + hipJitOptionOptimizationLevel, ///< Value of optimization level for generated codes, acceptable + ///< options -O0, -O1, -O2, -O3 + hipJitOptionTargetFromContext, ///< CUDA Only The target context, which is the default + hipJitOptionTarget, ///< CUDA Only JIT target + hipJitOptionFallbackStrategy, ///< CUDA Only Fallback strategy + hipJitOptionGenerateDebugInfo, ///< CUDA Only Generate debug information + hipJitOptionLogVerbose, ///< CUDA Only Generate log verbose + hipJitOptionGenerateLineInfo, ///< CUDA Only Generate line number information + hipJitOptionCacheMode, ///< CUDA Only Set cache mode + hipJitOptionSm3xOpt, ///< @deprecated CUDA Only New SM3X option. + hipJitOptionFastCompile, ///< CUDA Only Set fast compile + hipJitOptionGlobalSymbolNames, ///< CUDA Only Array of device symbol names to be relocated to the + ///< host + hipJitOptionGlobalSymbolAddresses, ///< CUDA Only Array of host addresses to be relocated to the + ///< device + hipJitOptionGlobalSymbolCount, ///< CUDA Only Number of symbol count. + hipJitOptionLto, ///< @deprecated CUDA Only Enable link-time optimization for device code + hipJitOptionFtz, ///< @deprecated CUDA Only Set single-precision denormals. + hipJitOptionPrecDiv, ///< @deprecated CUDA Only Set single-precision floating-point division + ///< and reciprocals + hipJitOptionPrecSqrt, ///< @deprecated CUDA Only Set single-precision floating-point square root + hipJitOptionFma, ///< @deprecated CUDA Only Enable floating-point multiplies and + ///< adds/subtracts operations + hipJitOptionPositionIndependentCode, ///< CUDA Only Generates Position Independent code + hipJitOptionMinCTAPerSM, ///< CUDA Only Hints to JIT compiler the minimum number of CTAs frin + ///< kernel's grid to be mapped to SM + hipJitOptionMaxThreadsPerBlock, ///< CUDA only Maximum number of threads in a thread block + hipJitOptionOverrideDirectiveValues, ///< Cuda only Override Directive values + hipJitOptionNumOptions, ///< Number of options + hipJitOptionIRtoISAOptExt = 10000, ///< Hip Only Linker options to be passed on to compiler + hipJitOptionIRtoISAOptCountExt, ///< Hip Only Count of linker options to be passed on to compiler +} hipJitOption; +/** + * hipJitInputType + */ +typedef enum hipJitInputType { + hipJitInputCubin = 0, ///< Cuda only Input cubin + hipJitInputPtx, ///< Cuda only Input PTX + hipJitInputFatBinary, ///< Cuda Only Input FAT Binary + hipJitInputObject, ///< Cuda Only Host Object with embedded device code + hipJitInputLibrary, ///< Cuda Only Archive of Host Objects with embedded + ///< device code + hipJitInputNvvm, ///< @deprecated Cuda only High Level intermediate + ///< code for LTO + hipJitNumLegacyInputTypes, ///< Count of Legacy Input Types + hipJitInputLLVMBitcode = 100, ///< HIP Only LLVM Bitcode or IR assembly + hipJitInputLLVMBundledBitcode = 101, ///< HIP Only LLVM Clang Bundled Code + hipJitInputLLVMArchivesOfBundledBitcode = 102, ///< HIP Only LLVM Archive of Bundled Bitcode + hipJitInputSpirv = 103, ///< HIP Only SPIRV Code Object + hipJitNumInputTypes = 10 ///< Count of Input Types +} hipJitInputType; +/** + * hipJitCacheMode + */ +typedef enum hipJitCacheMode { + hipJitCacheOptionNone = 0, + hipJitCacheOptionCG, + hipJitCacheOptionCA +} hipJitCacheMode; +/** + * hipJitFallback + */ +typedef enum hipJitFallback { + hipJitPreferPTX = 0, + hipJitPreferBinary, +} hipJitFallback; + +typedef enum hipLibraryOption_e { + hipLibraryHostUniversalFunctionAndDataTable = 0, + hipLibraryBinaryIsPreserved = 1 +} hipLibraryOption; + +// doxygen end LinkerTypes +/** + * @} + */ + +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif + +#endif // HIP_INCLUDE_HIP_LINKER_TYPES_H \ No newline at end of file diff --git a/external/hip/math_functions.h b/external/hip/math_functions.h new file mode 100644 index 0000000..896c861 --- /dev/null +++ b/external/hip/math_functions.h @@ -0,0 +1,42 @@ +/* +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_MATH_FUNCTIONS_H +#define HIP_INCLUDE_HIP_MATH_FUNCTIONS_H + +// Some standard header files, these are included by hc.hpp and so want to make them avail on both +// paths to provide a consistent include env and avoid "missing symbol" errors that only appears +// on NVCC path: + +#if !defined(__HIPCC_RTC__) +#include +#endif + +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) +#include +#elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +// #include +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif + +#endif diff --git a/external/hip/surface_types.h b/external/hip/surface_types.h new file mode 100644 index 0000000..d5cc457 --- /dev/null +++ b/external/hip/surface_types.h @@ -0,0 +1,65 @@ +/* +Copyright (c) 2022 - 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @file surface_types.h + * @brief Defines surface types for HIP runtime. + */ + +#ifndef HIP_INCLUDE_HIP_SURFACE_TYPES_H +#define HIP_INCLUDE_HIP_SURFACE_TYPES_H + +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wreserved-identifier" +#endif + +#if !defined(__HIPCC_RTC__) +#include +#endif + +/** + * An opaque value that represents a hip surface object + */ +struct __hip_surface; +typedef struct __hip_surface* hipSurfaceObject_t; + +/** + * hip surface reference + */ +struct surfaceReference { + hipSurfaceObject_t surfaceObject; +}; + +/** + * hip surface boundary modes + */ +enum hipSurfaceBoundaryMode { + hipBoundaryModeZero = 0, + hipBoundaryModeTrap = 1, + hipBoundaryModeClamp = 2 +}; + +#if defined(__clang__) +#pragma clang diagnostic pop +#endif + +#endif /* !HIP_INCLUDE_HIP_SURFACE_TYPES_H */ diff --git a/external/hip/texture_types.h b/external/hip/texture_types.h new file mode 100644 index 0000000..65290cd --- /dev/null +++ b/external/hip/texture_types.h @@ -0,0 +1,193 @@ +/* +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_TEXTURE_TYPES_H +#define HIP_INCLUDE_HIP_TEXTURE_TYPES_H + +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wreserved-identifier" +#pragma clang diagnostic ignored "-Wreserved-macro-identifier" +#pragma clang diagnostic ignored "-Wc++98-compat" +#endif + +#if !defined(__HIPCC_RTC__) +#include +#endif + +#if !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) +#include "texture_types.h" +#elif defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) +/******************************************************************************* + * * + * * + * * + *******************************************************************************/ +#if !defined(__HIPCC_RTC__) +#include +#include +#endif // !defined(__HIPCC_RTC__) + +#define hipTextureType1D 0x01 +#define hipTextureType2D 0x02 +#define hipTextureType3D 0x03 +#define hipTextureTypeCubemap 0x0C +#define hipTextureType1DLayered 0xF1 +#define hipTextureType2DLayered 0xF2 +#define hipTextureTypeCubemapLayered 0xFC + +/** + * Should be same as HSA_IMAGE_OBJECT_SIZE_DWORD/HSA_SAMPLER_OBJECT_SIZE_DWORD + */ +#define HIP_IMAGE_OBJECT_SIZE_DWORD 12 +#define HIP_SAMPLER_OBJECT_SIZE_DWORD 8 +#define HIP_SAMPLER_OBJECT_OFFSET_DWORD HIP_IMAGE_OBJECT_SIZE_DWORD +#define HIP_TEXTURE_OBJECT_SIZE_DWORD (HIP_IMAGE_OBJECT_SIZE_DWORD + HIP_SAMPLER_OBJECT_SIZE_DWORD) + +/** + * An opaque value that represents a hip texture object + */ +struct __hip_texture; +typedef struct __hip_texture* hipTextureObject_t; + +/** + * hip texture address modes + */ +enum hipTextureAddressMode { + hipAddressModeWrap = 0, + hipAddressModeClamp = 1, + hipAddressModeMirror = 2, + hipAddressModeBorder = 3 +}; + +/** + * hip texture filter modes + */ +enum hipTextureFilterMode { hipFilterModePoint = 0, hipFilterModeLinear = 1 }; + +/** + * hip texture read modes + */ +enum hipTextureReadMode { hipReadModeElementType = 0, hipReadModeNormalizedFloat = 1 }; + +/** + * hip texture reference + */ +typedef struct textureReference { + int normalized; + enum hipTextureReadMode readMode; // used only for driver API's + enum hipTextureFilterMode filterMode; + enum hipTextureAddressMode addressMode[3]; // Texture address mode for up to 3 dimensions + struct hipChannelFormatDesc channelDesc; + int sRGB; // Perform sRGB->linear conversion during texture read + unsigned int maxAnisotropy; // Limit to the anisotropy ratio + enum hipTextureFilterMode mipmapFilterMode; + float mipmapLevelBias; + float minMipmapLevelClamp; + float maxMipmapLevelClamp; + + hipTextureObject_t textureObject; + int numChannels; + enum hipArray_Format format; +} textureReference; + +/** + * hip texture descriptor + */ +typedef struct hipTextureDesc { + enum hipTextureAddressMode addressMode[3]; // Texture address mode for up to 3 dimensions + enum hipTextureFilterMode filterMode; + enum hipTextureReadMode readMode; + int sRGB; // Perform sRGB->linear conversion during texture read + float borderColor[4]; + int normalizedCoords; + unsigned int maxAnisotropy; + enum hipTextureFilterMode mipmapFilterMode; + float mipmapLevelBias; + float minMipmapLevelClamp; + float maxMipmapLevelClamp; +} hipTextureDesc; + +#if __cplusplus + +/******************************************************************************* + * * + * * + * * + *******************************************************************************/ +#if __HIP__ +#define __HIP_TEXTURE_ATTRIB __attribute__((device_builtin_texture_type)) +#else +#define __HIP_TEXTURE_ATTRIB +#endif + +typedef textureReference* hipTexRef; + +template +struct __HIP_TEXTURE_ATTRIB texture : public textureReference { + texture(int norm = 0, enum hipTextureFilterMode fMode = hipFilterModePoint, + enum hipTextureAddressMode aMode = hipAddressModeClamp) { + normalized = norm; + readMode = mode; + filterMode = fMode; + addressMode[0] = aMode; + addressMode[1] = aMode; + addressMode[2] = aMode; + channelDesc = hipCreateChannelDesc(); + sRGB = 0; + textureObject = nullptr; + maxAnisotropy = 0; + mipmapLevelBias = 0; + minMipmapLevelClamp = 0; + maxMipmapLevelClamp = 0; + } + + texture(int norm, enum hipTextureFilterMode fMode, enum hipTextureAddressMode aMode, + struct hipChannelFormatDesc desc) { + normalized = norm; + readMode = mode; + filterMode = fMode; + addressMode[0] = aMode; + addressMode[1] = aMode; + addressMode[2] = aMode; + channelDesc = desc; + sRGB = 0; + textureObject = nullptr; + maxAnisotropy = 0; + mipmapLevelBias = 0; + minMipmapLevelClamp = 0; + maxMipmapLevelClamp = 0; + } +}; + +#endif /* __cplusplus */ + +#else +#error ("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif + +#if defined(__clang__) +#pragma clang diagnostic pop +#endif + +#endif diff --git a/external/spdlog b/external/spdlog new file mode 160000 index 0000000..472945b --- /dev/null +++ b/external/spdlog @@ -0,0 +1 @@ +Subproject commit 472945ba489e3f5684761affc431ae532ab5ed8c diff --git a/kernels/kernels.hip b/kernels/kernels.hip new file mode 100644 index 0000000..73138f1 --- /dev/null +++ b/kernels/kernels.hip @@ -0,0 +1,10 @@ +#include + +extern "C" __global__ +void saxpy(float* y, const float* x, float a, int n) { + int i = blockIdx.x * blockDim.x + threadIdx.x; + if (i < n) { + y[i] += a * x[i]; + } +} + diff --git a/kernels/kernels_gfx1100.co b/kernels/kernels_gfx1100.co new file mode 100644 index 0000000..4e284f0 Binary files /dev/null and b/kernels/kernels_gfx1100.co differ diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..5548ea9 --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,82 @@ +#include +#include +#include + +int main() { + int N = 1 << 20; + + // ----------------------------- + // Host data + // ----------------------------- + std::vector hx(N, 1.0f); + std::vector hy(N, 2.0f); + + // ----------------------------- + // Device allocation + // ----------------------------- + float* dx = nullptr; + float* dy = nullptr; + + hipInit(0); + + hipMalloc(&dx, N * sizeof(float)); + hipMalloc(&dy, N * sizeof(float)); + + hipMemcpy(dx, hx.data(), N * sizeof(float), hipMemcpyHostToDevice); + hipMemcpy(dy, hy.data(), N * sizeof(float), hipMemcpyHostToDevice); + + // ----------------------------- + // Load precompiled kernel + // ----------------------------- + hipModule_t module{}; + hipFunction_t kernel{}; + + hipModuleLoad(&module, "kernels/kernels_gfx1100.co"); + hipModuleGetFunction(&kernel, module, "saxpy"); + + // ----------------------------- + // Kernel args + // ----------------------------- + float a = 3.0f; + void* args[] = { + &dy, + &dx, + &a, + &N + }; + + // ----------------------------- + // Launch + // ----------------------------- + const int blockSize = 256; + const int gridSize = (N + blockSize - 1) / blockSize; + + hipModuleLaunchKernel( + kernel, + gridSize, 1, 1, + blockSize, 1, 1, + 0, + nullptr, + args, + nullptr + ); + + hipDeviceSynchronize(); + + // ----------------------------- + // Copy back + // ----------------------------- + hipMemcpy(hy.data(), dy, N * sizeof(float), hipMemcpyDeviceToHost); + + std::cout << "hy[0] = " << hy[0] << std::endl; + + // ----------------------------- + // Cleanup + // ----------------------------- + hipFree(dx); + hipFree(dy); + hipModuleUnload(module); + + return 0; +} +