fc4807a6f0539c5e16a83e8dca5c5052885b5946
1 #include "hip/hip_runtime.h"
4 #include <hip/hip_runtime.h>
5 #include <hip/hip_runtime.h>
7 // Defining number of elements in Array
12 #define HIPCHECK(cmd) \
14 hipError_t error = (cmd); \
15 if (error != hipSuccess) { \
16 std::cerr << "Encountered HIP error (" << error << ") at line " \
17 << __LINE__ << " in file " << __FILE__ << "\n"; \
22 // Defining Kernel function for vector addition
23 __global__
void gpu_kernel_add(int *d_a
, int *d_b
, int *d_c
) {
24 // Getting block index of current kernel
25 //int tid = blockIdx.x; // handle the data at this index
26 int tid
= blockIdx
.x
* blockDim
.x
+ threadIdx
.x
;
29 d_c
[tid
] = d_a
[tid
] + d_b
[tid
];
34 // Defining host arrays
35 int h_a
[N
], h_b
[N
], h_c
[N
];
36 // Defining device pointers
37 int *d_a
[N
], *d_b
[N
], *d_c
[N
];
39 hipStream_t stream
[MAX_GPU
];
41 HIPCHECK(hipGetDeviceCount(&nGpu
));
43 for (int i
= 0; i
< nGpu
; i
++) {
44 HIPCHECK(hipSetDevice(i
));
46 HIPCHECK(hipGetDeviceProperties(&prop
, i
));
47 printf("# device %d [0x%02x] %s\n",
48 i
, prop
.pciBusID
, prop
.name
);
50 HIPCHECK(hipStreamCreate(&stream
[i
]));
52 hipMalloc((void**)&d_a
[i
], N
* sizeof(int));
53 hipMalloc((void**)&d_b
[i
], N
* sizeof(int));
54 hipMalloc((void**)&d_c
[i
], N
* sizeof(int));
56 // Initializing Arrays
57 for (int i
= 0; i
< N
; i
++) {
62 // Copy input arrays from host to device memory
63 hipMemcpyAsync(d_a
[i
], h_a
, N
* sizeof(int), hipMemcpyHostToDevice
, stream
[i
]);
64 hipMemcpyAsync(d_b
[i
], h_b
, N
* sizeof(int), hipMemcpyHostToDevice
, stream
[i
]);
67 for (int i
= 0; i
< nGpu
; i
++) {
68 HIPCHECK(hipSetDevice(i
));
69 // Calling kernels with N blocks and one thread per block, passing
70 // device pointers as parameters
71 hipLaunchKernelGGL(gpu_kernel_add
, dim3(N
), dim3(1 ), 0, stream
[i
], d_a
[i
], d_b
[i
], d_c
[i
]);
74 for (int i
= 0; i
< nGpu
; i
++) {
75 HIPCHECK(hipSetDevice(i
));
77 // Copy result back to host memory from device memory
78 hipMemcpyAsync(h_c
, d_c
[i
], N
* sizeof(int), hipMemcpyDeviceToHost
, stream
[i
]);
79 HIPCHECK(hipStreamSynchronize(stream
[i
]));
80 printf("Vector addition on GPU \n");
82 // Printing result on console
83 for (int i
= 0; i
< N
; i
++) {
84 printf("Operation result of %d element is %d + %d = %d\n",
85 i
, h_a
[i
], h_b
[i
],h_c
[i
]);
89 HIPCHECK(hipStreamDestroy(stream
[i
]));
This page took 0.030977 seconds and 3 git commands to generate.