should work if we compile it together on the AMD machin in the cloud

This commit is contained in:
2026-01-27 14:28:10 -06:00
commit 536b475d5b
65 changed files with 26551 additions and 0 deletions

10
kernels/kernels.hip Normal file
View File

@@ -0,0 +1,10 @@
#include <hip/hip_runtime.h>
extern "C" __global__
void saxpy(float* y, const float* x, float a, int n) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < n) {
y[i] += a * x[i];
}
}