gdb/testsuite/gdb.rocm/multi-GPU.cpp

   1 #include "hip/hip_runtime.h"
   2 #include "stdio.h"
   3 #include <iostream>
   4 #include <hip/hip_runtime.h>
   5 #include <hip/hip_runtime.h>
   6
   7 // Defining number of elements in Array
   8 #define N 5
   9
  10 #define MAX_GPU 8
  11
  12 #define HIPCHECK(cmd)                                                          \
  13 do {                                                                           \
  14     hipError_t error = (cmd);                                                  \
  15     if (error != hipSuccess) {                                                 \
  16         std::cerr << "Encountered HIP error (" << error << ") at line "        \
  17                   << __LINE__ << " in file " << __FILE__ << "\n";              \
  18         exit(-1);                                                              \
  19     }                                                                          \
  20 } while (0)
  21
  22 // Defining Kernel function for vector addition
  23 __global__ void gpu_kernel_add(int *d_a, int *d_b, int *d_c) {
  24     // Getting block index of current kernel
  25     //int tid = blockIdx.x; // handle the data at this index
  26     int tid = blockIdx.x * blockDim.x + threadIdx.x;
  27     printf("\n%d\n",tid);
  28     if (tid < N)
  29         d_c[tid] = d_a[tid] + d_b[tid];
  30 }
  31
  32 int main(void)
  33 {
  34     // Defining host arrays
  35     int h_a[N], h_b[N], h_c[N];
  36     // Defining device pointers
  37     int *d_a[N], *d_b[N], *d_c[N];
  38
  39     hipStream_t stream[MAX_GPU];
  40     int nGpu = 1;
  41     HIPCHECK(hipGetDeviceCount(&nGpu));
  42
  43     for (int i = 0; i < nGpu; i ++) {
  44         HIPCHECK(hipSetDevice(i));
  45         hipDeviceProp_t prop;
  46         HIPCHECK(hipGetDeviceProperties(&prop, i));
  47         printf("#   device %d [0x%02x] %s\n",
  48                         i, prop.pciBusID, prop.name);
  49         //create stream
  50         HIPCHECK(hipStreamCreate(&stream[i]));
  51
  52         hipMalloc((void**)&d_a[i], N * sizeof(int));
  53         hipMalloc((void**)&d_b[i], N * sizeof(int));
  54         hipMalloc((void**)&d_c[i], N * sizeof(int));
  55
  56         // Initializing Arrays
  57         for (int i = 0; i < N; i++) {
  58             h_a[i] = 2*i;
  59             h_b[i] = i ;
  60         }
  61
  62         // Copy input arrays from host to device memory
  63         hipMemcpyAsync(d_a[i], h_a, N * sizeof(int), hipMemcpyHostToDevice, stream[i]);
  64         hipMemcpyAsync(d_b[i], h_b, N * sizeof(int), hipMemcpyHostToDevice, stream[i]);
  65     }
  66
  67     for (int i = 0; i < nGpu; i ++) {
  68         HIPCHECK(hipSetDevice(i));
  69         // Calling kernels with N blocks and one thread per block, passing
  70         // device pointers as parameters
  71         hipLaunchKernelGGL(gpu_kernel_add, dim3(N), dim3(1 ), 0, stream[i], d_a[i], d_b[i], d_c[i]);
  72     }
  73
  74     for (int i = 0; i < nGpu; i ++) {
  75         HIPCHECK(hipSetDevice(i));
  76
  77         // Copy result back to host memory from device memory
  78         hipMemcpyAsync(h_c, d_c[i], N * sizeof(int), hipMemcpyDeviceToHost, stream[i]);
  79         HIPCHECK(hipStreamSynchronize(stream[i]));
  80         printf("Vector addition on GPU \n");
  81
  82         // Printing result on console
  83         for (int i = 0; i < N; i++) {
  84             printf("Operation result of %d element is %d + %d = %d\n",
  85                 i, h_a[i], h_b[i],h_c[i]);
  86         }
  87
  88         // Free up memory
  89         HIPCHECK(hipStreamDestroy(stream[i]));
  90         hipFree(d_a[i]);
  91         hipFree(d_b[i]);
  92         hipFree(d_c[i]);
  93     }
  94     return 0;
  95 }
  96