[deliverable/binutils-gdb.git] / gdb / testsuite / gdb.rocm / multi-GPU.cpp

/* Copyright (C) 2019-2020 Free Software Foundation, Inc.
   Copyright (C) 2019-2020 Advanced Micro Devices, Inc. All rights reserved.

   This file is part of GDB.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */

#include "hip/hip_runtime.h"
#include "stdio.h"
#include <iostream>
#include <hip/hip_runtime.h>
#include <hip/hip_runtime.h>

// Defining number of elements in Array
#define N 5

#define MAX_GPU 8

#define HIPCHECK(cmd)                                                          \
do {                                                                           \
    hipError_t error = (cmd);                                                  \
    if (error != hipSuccess) {                                                 \
        std::cerr << "Encountered HIP error (" << error << ") at line "        \
                  << __LINE__ << " in file " << __FILE__ << "\n";              \
        exit(-1);                                                              \
    }                                                                          \
} while (0)

// Defining Kernel function for vector addition
__global__ void gpu_kernel_add(int *d_a, int *d_b, int *d_c) {
    // Getting block index of current kernel
    //int tid = blockIdx.x; // handle the data at this index
    int tid = blockIdx.x * blockDim.x + threadIdx.x;
    printf("\n%d\n",tid);
    if (tid < N)
        d_c[tid] = d_a[tid] + d_b[tid];
}

int main(void)
{
    // Defining host arrays
    int h_a[N], h_b[N], h_c[N];
    // Defining device pointers
    int *d_a[N], *d_b[N], *d_c[N];

    hipStream_t stream[MAX_GPU];
    int nGpu = 1;
    HIPCHECK(hipGetDeviceCount(&nGpu));

    for (int i = 0; i < nGpu; i ++) {
        HIPCHECK(hipSetDevice(i));
        hipDeviceProp_t prop;
        HIPCHECK(hipGetDeviceProperties(&prop, i));
        printf("#   device %d [0x%02x] %s\n",
                        i, prop.pciBusID, prop.name);
        //create stream
        HIPCHECK(hipStreamCreate(&stream[i]));

        hipMalloc((void**)&d_a[i], N * sizeof(int));
        hipMalloc((void**)&d_b[i], N * sizeof(int));
        hipMalloc((void**)&d_c[i], N * sizeof(int));

        // Initializing Arrays
        for (int i = 0; i < N; i++) {
            h_a[i] = 2*i;
            h_b[i] = i ;
        }

        // Copy input arrays from host to device memory
        hipMemcpyAsync(d_a[i], h_a, N * sizeof(int), hipMemcpyHostToDevice, stream[i]);
        hipMemcpyAsync(d_b[i], h_b, N * sizeof(int), hipMemcpyHostToDevice, stream[i]);
    }

    for (int i = 0; i < nGpu; i ++) {
        HIPCHECK(hipSetDevice(i));
        // Calling kernels with N blocks and one thread per block, passing
        // device pointers as parameters
        hipLaunchKernelGGL(gpu_kernel_add, dim3(N), dim3(1 ), 0, stream[i], d_a[i], d_b[i], d_c[i]);
    }

    for (int i = 0; i < nGpu; i ++) {
        HIPCHECK(hipSetDevice(i));

        // Copy result back to host memory from device memory
        hipMemcpyAsync(h_c, d_c[i], N * sizeof(int), hipMemcpyDeviceToHost, stream[i]);
        HIPCHECK(hipStreamSynchronize(stream[i]));
        printf("Vector addition on GPU \n");

        // Printing result on console
        for (int i = 0; i < N; i++) {
            printf("Operation result of %d element is %d + %d = %d\n",
                i, h_a[i], h_b[i],h_c[i]);
        }

        // Free up memory
        HIPCHECK(hipStreamDestroy(stream[i]));
        hipFree(d_a[i]);
        hipFree(d_b[i]);
        hipFree(d_c[i]);
    }
    return 0;
}
Commit	Line	Data
ec980634	1	/* Copyright (C) 2019-2020 Free Software Foundation, Inc.
	2	Copyright (C) 2019-2020 Advanced Micro Devices, Inc. All rights reserved.
	3
	4	This file is part of GDB.
	5
	6	This program is free software; you can redistribute it and/or modify
	7	it under the terms of the GNU General Public License as published by
	8	the Free Software Foundation; either version 3 of the License, or
	9	(at your option) any later version.
	10
	11	This program is distributed in the hope that it will be useful,
	12	but WITHOUT ANY WARRANTY; without even the implied warranty of
	13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	14	GNU General Public License for more details.
	15
	16	You should have received a copy of the GNU General Public License
	17	along with this program. If not, see <http://www.gnu.org/licenses/>. */
	18
820c00e6 QS	19	#include "hip/hip_runtime.h"
	20	#include "stdio.h"
	21	#include <iostream>
	22	#include <hip/hip_runtime.h>
	23	#include <hip/hip_runtime.h>
	24
	25	// Defining number of elements in Array
	26	#define N 5
	27
	28	#define MAX_GPU 8
	29
	30	#define HIPCHECK(cmd) \
	31	do { \
	32	hipError_t error = (cmd); \
	33	if (error != hipSuccess) { \
	34	std::cerr << "Encountered HIP error (" << error << ") at line " \
	35	<< __LINE__ << " in file " << __FILE__ << "\n"; \
	36	exit(-1); \
	37	} \
	38	} while (0)
	39
	40	// Defining Kernel function for vector addition
	41	__global__ void gpu_kernel_add(int d_a, int d_b, int *d_c) {
	42	// Getting block index of current kernel
	43	//int tid = blockIdx.x; // handle the data at this index
	44	int tid = blockIdx.x * blockDim.x + threadIdx.x;
	45	printf("\n%d\n",tid);
	46	if (tid < N)
	47	d_c[tid] = d_a[tid] + d_b[tid];
	48	}
	49
	50	int main(void)
	51	{
	52	// Defining host arrays
	53	int h_a[N], h_b[N], h_c[N];
	54	// Defining device pointers
	55	int d_a[N], d_b[N], *d_c[N];
	56
	57	hipStream_t stream[MAX_GPU];
	58	int nGpu = 1;
	59	HIPCHECK(hipGetDeviceCount(&nGpu));
	60
	61	for (int i = 0; i < nGpu; i ++) {
	62	HIPCHECK(hipSetDevice(i));
	63	hipDeviceProp_t prop;
	64	HIPCHECK(hipGetDeviceProperties(&prop, i));
	65	printf("# device %d [0x%02x] %s\n",
	66	i, prop.pciBusID, prop.name);
	67	//create stream
	68	HIPCHECK(hipStreamCreate(&stream[i]));
	69
	70	hipMalloc((void*)&d_a[i], N sizeof(int));
	71	hipMalloc((void*)&d_b[i], N sizeof(int));
	72	hipMalloc((void*)&d_c[i], N sizeof(int));
	73
	74	// Initializing Arrays
	75	for (int i = 0; i < N; i++) {
	76	h_a[i] = 2*i;
	77	h_b[i] = i ;
	78	}
	79
	80	// Copy input arrays from host to device memory
	81	hipMemcpyAsync(d_a[i], h_a, N * sizeof(int), hipMemcpyHostToDevice, stream[i]);
	82	hipMemcpyAsync(d_b[i], h_b, N * sizeof(int), hipMemcpyHostToDevice, stream[i]);
83	}
84
85	for (int i = 0; i < nGpu; i ++) {
86	HIPCHECK(hipSetDevice(i));
87	// Calling kernels with N blocks and one thread per block, passing
88	// device pointers as parameters
89	hipLaunchKernelGGL(gpu_kernel_add, dim3(N), dim3(1 ), 0, stream[i], d_a[i], d_b[i], d_c[i]);
90	}
91
92	for (int i = 0; i < nGpu; i ++) {
93	HIPCHECK(hipSetDevice(i));
94
95	// Copy result back to host memory from device memory
96	hipMemcpyAsync(h_c, d_c[i], N * sizeof(int), hipMemcpyDeviceToHost, stream[i]);
97	HIPCHECK(hipStreamSynchronize(stream[i]));
98	printf("Vector addition on GPU \n");
99
100	// Printing result on console
101	for (int i = 0; i < N; i++) {
102	printf("Operation result of %d element is %d + %d = %d\n",
103	i, h_a[i], h_b[i],h_c[i]);
104	}
105
106	// Free up memory
107	HIPCHECK(hipStreamDestroy(stream[i]));
108	hipFree(d_a[i]);
109	hipFree(d_b[i]);
110	hipFree(d_c[i]);
111	}
112	return 0;
113	}
114