should work if we compile it together on the AMD machin in the cloud
This commit is contained in:
162
external/hip/hip_ext.h
vendored
Normal file
162
external/hip/hip_ext.h
vendored
Normal file
@@ -0,0 +1,162 @@
|
||||
/*
|
||||
Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef HIP_INCLUDE_HIP_HIP_EXT_H
|
||||
#define HIP_INCLUDE_HIP_HIP_EXT_H
|
||||
#include "hip/hip_runtime.h"
|
||||
#if defined(__cplusplus)
|
||||
#include <tuple>
|
||||
#include <type_traits>
|
||||
#endif
|
||||
/** @addtogroup Execution Execution Control
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Launches kernel with parameters and shared memory on stream with arguments passed
|
||||
* to kernel params or extra arguments.
|
||||
*
|
||||
* @param [in] f Kernel to launch.
|
||||
* @param [in] globalWorkSizeX X grid dimension specified in work-items.
|
||||
* @param [in] globalWorkSizeY Y grid dimension specified in work-items.
|
||||
* @param [in] globalWorkSizeZ Z grid dimension specified in work-items.
|
||||
* @param [in] localWorkSizeX X block dimension specified in work-items.
|
||||
* @param [in] localWorkSizeY Y block dimension specified in work-items.
|
||||
* @param [in] localWorkSizeZ Z block dimension specified in work-items.
|
||||
* @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel.
|
||||
* HIP-Clang compiler provides support for extern shared declarations.
|
||||
* @param [in] hStream Stream where the kernel should be dispatched.
|
||||
* May be 0, in which case the default stream is used with associated synchronization rules.
|
||||
* @param [in] kernelParams pointer to kernel parameters.
|
||||
* @param [in] extra Pointer to kernel arguments. These are passed directly to the kernel and
|
||||
* must be in the memory layout and alignment expected by the kernel.
|
||||
* All passed arguments must be naturally aligned according to their type. The memory address of
|
||||
* each argument should be a multiple of its size in bytes. Please refer to
|
||||
* hip_porting_driver_api.md for sample usage.
|
||||
* @param [in] startEvent If non-null, specified event will be updated to track the start time of
|
||||
* the kernel launch. The event must be created before calling this API.
|
||||
* @param [in] stopEvent If non-null, specified event will be updated to track the stop time of
|
||||
* the kernel launch. The event must be created before calling this API.
|
||||
* @param [in] flags The value of hipExtAnyOrderLaunch, signifies if kernel can be
|
||||
* launched in any order.
|
||||
* @returns #hipSuccess, #hipInvalidDeviceId, #hipErrorNotInitialized, #hipErrorInvalidValue.
|
||||
*
|
||||
* HIP/ROCm actually updates the start event when the associated kernel completes.
|
||||
* Currently, timing between startEvent and stopEvent does not include the time it takes to perform
|
||||
* a system scope release/cache flush - only the time it takes to issues writes to cache.
|
||||
*
|
||||
* @note For this HIP API, the flag 'hipExtAnyOrderLaunch' is not supported on AMD GFX9xx boards.
|
||||
*
|
||||
*/
|
||||
HIP_PUBLIC_API
|
||||
extern "C" hipError_t hipExtModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
|
||||
uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
|
||||
uint32_t localWorkSizeX, uint32_t localWorkSizeY,
|
||||
uint32_t localWorkSizeZ, size_t sharedMemBytes,
|
||||
hipStream_t hStream, void** kernelParams,
|
||||
void** extra, hipEvent_t startEvent __dparm(NULL),
|
||||
hipEvent_t stopEvent __dparm(NULL),
|
||||
uint32_t flags __dparm(0));
|
||||
/**
|
||||
* @brief This HIP API is deprecated, please use hipExtModuleLaunchKernel() instead.
|
||||
*
|
||||
*/
|
||||
HIP_DEPRECATED("use hipExtModuleLaunchKernel instead")
|
||||
HIP_PUBLIC_API
|
||||
extern "C" hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
|
||||
uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
|
||||
uint32_t localWorkSizeX, uint32_t localWorkSizeY,
|
||||
uint32_t localWorkSizeZ, size_t sharedMemBytes,
|
||||
hipStream_t hStream, void** kernelParams,
|
||||
void** extra, hipEvent_t startEvent __dparm(NULL),
|
||||
hipEvent_t stopEvent __dparm(NULL));
|
||||
|
||||
#if defined(__cplusplus)
|
||||
|
||||
/**
|
||||
* @brief Launches kernel from the pointer address, with arguments and shared memory on stream.
|
||||
*
|
||||
* @param [in] function_address pointer to the Kernel to launch.
|
||||
* @param [in] numBlocks number of blocks.
|
||||
* @param [in] dimBlocks dimension of a block.
|
||||
* @param [in] args pointer to kernel arguments.
|
||||
* @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel.
|
||||
* HIP-Clang compiler provides support for extern shared declarations.
|
||||
* @param [in] stream Stream where the kernel should be dispatched.
|
||||
* May be 0, in which case the default stream is used with associated synchronization rules.
|
||||
* @param [in] startEvent If non-null, specified event will be updated to track the start time of
|
||||
* the kernel launch. The event must be created before calling this API.
|
||||
* @param [in] stopEvent If non-null, specified event will be updated to track the stop time of
|
||||
* the kernel launch. The event must be created before calling this API.
|
||||
* @param [in] flags The value of hipExtAnyOrderLaunch, signifies if kernel can be
|
||||
* launched in any order.
|
||||
* @returns #hipSuccess, #hipInvalidDeviceId, #hipErrorNotInitialized, #hipErrorInvalidValue.
|
||||
*
|
||||
*/
|
||||
extern "C" hipError_t hipExtLaunchKernel(const void* function_address, dim3 numBlocks,
|
||||
dim3 dimBlocks, void** args, size_t sharedMemBytes,
|
||||
hipStream_t stream, hipEvent_t startEvent,
|
||||
hipEvent_t stopEvent, int flags);
|
||||
|
||||
/**
|
||||
* @brief Launches kernel with dimention parameters and shared memory on stream with templated
|
||||
* kernel and arguments.
|
||||
*
|
||||
* @param [in] kernel Kernel to launch.
|
||||
* @param [in] numBlocks const number of blocks.
|
||||
* @param [in] dimBlocks const dimension of a block.
|
||||
* @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel.
|
||||
* HIP-Clang compiler provides support for extern shared declarations.
|
||||
* @param [in] stream Stream where the kernel should be dispatched.
|
||||
* May be 0, in which case the default stream is used with associated synchronization rules.
|
||||
* @param [in] startEvent If non-null, specified event will be updated to track the start time of
|
||||
* the kernel launch. The event must be created before calling this API.
|
||||
* @param [in] stopEvent If non-null, specified event will be updated to track the stop time of
|
||||
* the kernel launch. The event must be created before calling this API.
|
||||
* @param [in] flags The value of hipExtAnyOrderLaunch, signifies if kernel can be
|
||||
* launched in any order.
|
||||
* @param [in] args templated kernel arguments.
|
||||
*
|
||||
*/
|
||||
template <typename... Args, typename F = void (*)(Args...)>
|
||||
inline void hipExtLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
|
||||
std::uint32_t sharedMemBytes, hipStream_t stream,
|
||||
hipEvent_t startEvent, hipEvent_t stopEvent, std::uint32_t flags,
|
||||
Args... args) {
|
||||
constexpr size_t count = sizeof...(Args);
|
||||
auto tup_ = std::tuple<Args...>{args...};
|
||||
auto tup = validateArgsCountType(kernel, tup_);
|
||||
void* _Args[count];
|
||||
pArgs<0>(tup, _Args);
|
||||
|
||||
auto k = reinterpret_cast<void*>(kernel);
|
||||
hipExtLaunchKernel(k, numBlocks, dimBlocks, _Args, sharedMemBytes, stream, startEvent, stopEvent,
|
||||
(int)flags);
|
||||
}
|
||||
|
||||
#endif // defined(__cplusplus)
|
||||
|
||||
// doxygen end AMD-specific features
|
||||
/**
|
||||
* @}
|
||||
*/
|
||||
#endif // #iidef HIP_INCLUDE_HIP_HIP_EXT_H
|
||||
Reference in New Issue
Block a user