fc4807a6f0539c5e16a83e8dca5c5052885b5946
[deliverable/binutils-gdb.git] / gdb / testsuite / gdb.rocm / multi-GPU.cpp
1 #include "hip/hip_runtime.h"
2 #include "stdio.h"
3 #include <iostream>
4 #include <hip/hip_runtime.h>
5 #include <hip/hip_runtime.h>
6
7 // Defining number of elements in Array
8 #define N 5
9
10 #define MAX_GPU 8
11
12 #define HIPCHECK(cmd) \
13 do { \
14 hipError_t error = (cmd); \
15 if (error != hipSuccess) { \
16 std::cerr << "Encountered HIP error (" << error << ") at line " \
17 << __LINE__ << " in file " << __FILE__ << "\n"; \
18 exit(-1); \
19 } \
20 } while (0)
21
22 // Defining Kernel function for vector addition
23 __global__ void gpu_kernel_add(int *d_a, int *d_b, int *d_c) {
24 // Getting block index of current kernel
25 //int tid = blockIdx.x; // handle the data at this index
26 int tid = blockIdx.x * blockDim.x + threadIdx.x;
27 printf("\n%d\n",tid);
28 if (tid < N)
29 d_c[tid] = d_a[tid] + d_b[tid];
30 }
31
32 int main(void)
33 {
34 // Defining host arrays
35 int h_a[N], h_b[N], h_c[N];
36 // Defining device pointers
37 int *d_a[N], *d_b[N], *d_c[N];
38
39 hipStream_t stream[MAX_GPU];
40 int nGpu = 1;
41 HIPCHECK(hipGetDeviceCount(&nGpu));
42
43 for (int i = 0; i < nGpu; i ++) {
44 HIPCHECK(hipSetDevice(i));
45 hipDeviceProp_t prop;
46 HIPCHECK(hipGetDeviceProperties(&prop, i));
47 printf("# device %d [0x%02x] %s\n",
48 i, prop.pciBusID, prop.name);
49 //create stream
50 HIPCHECK(hipStreamCreate(&stream[i]));
51
52 hipMalloc((void**)&d_a[i], N * sizeof(int));
53 hipMalloc((void**)&d_b[i], N * sizeof(int));
54 hipMalloc((void**)&d_c[i], N * sizeof(int));
55
56 // Initializing Arrays
57 for (int i = 0; i < N; i++) {
58 h_a[i] = 2*i;
59 h_b[i] = i ;
60 }
61
62 // Copy input arrays from host to device memory
63 hipMemcpyAsync(d_a[i], h_a, N * sizeof(int), hipMemcpyHostToDevice, stream[i]);
64 hipMemcpyAsync(d_b[i], h_b, N * sizeof(int), hipMemcpyHostToDevice, stream[i]);
65 }
66
67 for (int i = 0; i < nGpu; i ++) {
68 HIPCHECK(hipSetDevice(i));
69 // Calling kernels with N blocks and one thread per block, passing
70 // device pointers as parameters
71 hipLaunchKernelGGL(gpu_kernel_add, dim3(N), dim3(1 ), 0, stream[i], d_a[i], d_b[i], d_c[i]);
72 }
73
74 for (int i = 0; i < nGpu; i ++) {
75 HIPCHECK(hipSetDevice(i));
76
77 // Copy result back to host memory from device memory
78 hipMemcpyAsync(h_c, d_c[i], N * sizeof(int), hipMemcpyDeviceToHost, stream[i]);
79 HIPCHECK(hipStreamSynchronize(stream[i]));
80 printf("Vector addition on GPU \n");
81
82 // Printing result on console
83 for (int i = 0; i < N; i++) {
84 printf("Operation result of %d element is %d + %d = %d\n",
85 i, h_a[i], h_b[i],h_c[i]);
86 }
87
88 // Free up memory
89 HIPCHECK(hipStreamDestroy(stream[i]));
90 hipFree(d_a[i]);
91 hipFree(d_b[i]);
92 hipFree(d_c[i]);
93 }
94 return 0;
95 }
96
This page took 0.030977 seconds and 3 git commands to generate.