--- /dev/null
+#include "hip/hip_runtime.h"
+#include "stdio.h"
+#include <iostream>
+#include <hip/hip_runtime.h>
+#include <hip/hip_runtime.h>
+
+// Defining number of elements in Array
+#define N 64
+#define NO_OF_BLOCKS 4
+#define NO_OF_THREADS 256
+
+
+
+#define HIPCHECK(cmd) \
+do { \
+ hipError_t error = (cmd); \
+ if (error != hipSuccess) \
+ { \
+ std::cerr << "Encountered HIP error (" << error << ") at line " \
+ << __LINE__ << " in file " << __FILE__ << "\n"; \
+ exit(-1); \
+ } \
+} while (0)
+
+#define MAX_GPU 8
+
+
+
+// Defining Kernel function for vector addition
+__global__ void VectorAdd(int *d_a, int *d_b, int *d_c)
+{
+ // Getting block index of current kernel
+ int tid = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
+ if (tid < N)
+ d_c[tid] = d_a[tid] + d_b[tid];
+}
+
+
+int main(void)
+{
+ // Defining host arrays
+ int h_a[N], h_b[N], h_c[N];
+ // Defining device pointers
+ int *d_a[N], *d_b[N], *d_c[N];
+ // allocate the memory
+
+ hipStream_t stream[MAX_GPU];
+
+ int nGpu = 1;
+ HIPCHECK(hipGetDeviceCount(&nGpu));
+ for (int i = 0; i < nGpu; i ++) {
+ HIPCHECK(hipSetDevice(i));
+ hipDeviceProp_t prop;
+ HIPCHECK(hipGetDeviceProperties(&prop, i));
+ printf("# device %d [0x%02x] %s\n",
+ i, prop.pciBusID, prop.name);
+ //create stream
+ HIPCHECK(hipStreamCreate(&stream[i]));
+
+ hipMalloc((void**)&d_a[i], N * sizeof(int));
+ hipMalloc((void**)&d_b[i], N * sizeof(int));
+ hipMalloc((void**)&d_c[i], N * sizeof(int));
+ // Initializing Arrays
+ for (int i = 0; i < N; i++) {
+ h_a[i] = 2*i;
+ h_b[i] = i ;
+ }
+
+ // Copy input arrays from host to device memory
+ hipMemcpyAsync(d_a[i], h_a, N * sizeof(int), hipMemcpyHostToDevice, stream[i]);
+ hipMemcpyAsync(d_b[i], h_b, N * sizeof(int), hipMemcpyHostToDevice, stream[i]);
+ }
+
+ for (int i = 0; i < nGpu; i ++) {
+ HIPCHECK(hipSetDevice(i));
+
+ // Calling VectorAdd kernels with NO_OF_BLOCKS and NO_OF_THREADS per block, passing
+ // device pointers as parameters
+ hipLaunchKernelGGL(VectorAdd, dim3(NO_OF_BLOCKS), dim3(NO_OF_THREADS), 0, stream[i], d_a[i], d_b[i], d_c[i]);
+ }
+
+ for (int i = 0; i < nGpu; i ++) {
+ HIPCHECK(hipSetDevice(i));
+ // Copy result back to host memory from device memory
+ hipMemcpyAsync(h_c, d_c[i], N * sizeof(int), hipMemcpyDeviceToHost, stream[i]);
+ HIPCHECK(hipStreamSynchronize(stream[i]));
+ //printf("Vector addition on GPU \n");
+ // Printing result on console
+ for (int i = 0; i < N; i++) {
+ /*printf("Operation result of %d element is %d + %d = %d\n",
+ i, h_a[i], h_b[i],h_c[i]);*/
+ if(h_a[i]+h_b[i] !=h_c[i]) {
+ HIPCHECK(hipErrorUnknown);
+ }
+ }
+ // Free up memory
+ HIPCHECK(hipStreamDestroy(stream[i]));
+ hipFree(d_a[i]);
+ hipFree(d_b[i]);
+ hipFree(d_c[i]);
+ }
+ return 0;
+}
--- /dev/null
+# Copyright (C) 2019-2020 Free Software Foundation, Inc.
+# Copyright (C) 2019-2020 Advanced Micro Devices, Inc. All rights reserved.
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+#exp_internal 1
+
+load_lib rocm.exp
+
+set testfile "nonstop-mode"
+set srcfile ${srcdir}/${subdir}/${testfile}.cpp
+set objfile [standard_output_file ${testfile}.o]
+set binfile [standard_output_file ${testfile}]
+
+set wave_id {(0,0,0)/0}
+set rocm_threadno {1.5}
+set threadid {16}
+
+# Check if skip hip tests
+if [skip_hipcc_tests] {
+ verbose "Skipping hip test: ${testfile}."
+ return 0
+}
+
+# Compile the hip program
+if {[prepare_for_testing "failed to prepare ${testfile}" $testfile $srcfile {debug hip}]} {
+ return -1
+}
+
+gdb_start
+
+# Load the hip program
+if {[gdb_load ${binfile}] == -1} {
+ verbose "failed to load program ${testfile}."
+ return -1
+}
+
+# Run to main and break
+if ![runto_main] {
+ fail "can't run to main and break in program ${testfile}."
+ return -1
+}
+
+
+#Code to fetch threadno and corresponding wave_id
+#Set breakpoing in device code
+gdb_breakpoint "VectorAdd" "allow-pending"
+gdb_test "continue" {.+hit\sBreakpoint\s\d+.+\sVectorAdd\s\(.*\)\sat.*}
+send_gdb "info threads\n"
+gdb_expect -re "\\s+(\\d+)\\s+AMDGPU Thread\\s+(\\d+\.\\d+)\\s+(\\(\\d+,\\d+,\\d+\\)/\\d+).*$gdb_prompt $" {
+ set threadid "$expect_out(1,string)"
+ set rocm_threadno "$expect_out(2,string)"
+ set wave_id "$expect_out(3,string)"
+}
+
+verbose $rocm_threadno
+verbose $threadid
+verbose $wave_id
+regsub -all {[]*+.|()^$\[\\]} $wave_id {\\&} wave_id_re
+verbose $wave_id_re
+
+clean_restart ${binfile}
+
+#Turn on non-stop mode
+gdb_test_no_output "set non-stop on"
+gdb_breakpoint "VectorAdd" "allow-pending"
+send_gdb "run\n"
+set test_name "run"
+gdb_expect 60 {
+ -re "hit Breakpoint \[1-9\].*$gdb_prompt $" {
+ #print "$expect_out(buffer)"
+ pass "run" }
+ timeout {fail "(timeout) run"}
+}
+#gdb_test "run" {.+hit\sBreakpoint\s\d+.+\sVectorAdd\s\(.*\)\sat.*}
+
+#in non-stop mode when we do run after puting breakpoint in kernel all threads gets created
+#so we need to wait until all thread info get display on gdb console.
+sleep 5
+
+gdb_test "thread $threadid" ".Switching to thread $threadid.*AMDGPU Thread.*"
+gdb_test "continue -a" {.+Inferior\s[\d].+\sexited\snormally.+}
+send_gdb "run\n"
+set test_name "run"
+gdb_expect 60 {
+ -re "hit Breakpoint \[1-9\].*$gdb_prompt $" {
+ #print "$expect_out(buffer)"
+ pass "run" }
+ timeout {fail "(timeout) run"}
+}
+#gdb_test "run" {.+hit\sBreakpoint\s\d+.+\sVectorAdd\s\(.*\)\sat.*}
+sleep 5
+gdb_test "thread $threadid" ".Switching to thread $threadid.*AMDGPU Thread.*"
+
+
+# Check info agents
+# vega10 sample output "1 43:00.0 vega10 4 56 4 10"
+# vega20 sample output "1 b1:00.0 vega20 4 60 4 10"
+gdb_test_sequence "info agents" "info agents" {
+ {Id\s+PCI Slot\s+Device Name\s+Shader Engines\s+Compute Units\s+SIMD/CU\s+Wavefronts/SIMD}
+ {\d\s+\d+:\d+\.\d\s+\w+\d+\s+\d+\s+\d+\s+\d+\s+\d}
+}
+
+
+#Check info threads
+#sample output
+#* 5 AMDGPU Thread 1.1 (0,0,0)/0 "bit_extract_kernel" bit_extract_kernel () at bit_extract.cpp:38
+# 6 AMDGPU Thread 1.2 (0,0,0)/1 "bit_extract_kernel" __hip_get_block_dim_x ()
+gdb_test_sequence "info threads" "info threads" {
+ {\sId\s+Target\s+Id\s+Frame}
+ {.+\s+\d+\s+AMDGPU\sThread\s\d+\.\d+\s.*}
+}
+
+sleep 5
+
+
+#Show architecture info while debugging in device code
+#Sample output "The target architecture is set automatically (currently amdgcn:gfx906)"
+gdb_test_sequence "show architecture" "show architecture" {
+ {The target architecture is set automatically\s\(currently amdgcn:gfx\d+\)}
+}
+
+
+gdb_test_sequence "show convenience" "show convenience" {
+ {.+\$_thread = \d+.+}
+ {\$_wave_id = \"\(\d+,\d+,\d+\)/\d+\"}
+
+}
+
+
+#info sharedlibrary
+#sample output
+#From To Syms Read Shared Object Library
+#0x00007ffbdfe05000 0x00007ffbdfe07a2c Yes (*) AMDGPU shared object [loaded from memory 0xab9900..0xac3470]
+#0x00007ffbdc201000 0x00007ffbdc201c94 Yes AMDGPU shared object [loaded from memory 0x9b71d0..0x9bae28]
+gdb_test_sequence "info sharedlibrary" "info sharedlibrary" {
+ {From\s+To\s+Syms\s+Read\s+Shared Object Library}
+ {0x[0-9a-fA-F]+\s+0x[0-9a-fA-F]+\s+Yes\s\(\*\)\s+AMDGPU shared object.}
+ {0x[0-9a-fA-F]+\s+0x[0-9a-fA-F]+\s+Yes\s+AMDGPU shared object.}
+}
+
+
+#info break
+#sample output
+#Num Type Disp Enb Address What
+#1 breakpoint keep y 0x0000000000400d49 in main(int, char**) at bit_extract.cpp:54
+# breakpoint already hit 1 time
+#2 breakpoint keep y 0x00007ffbdc2012dc in bit_extract_kernel() at bit_extract.cpp:38
+# breakpoint already hit 1 time
+gdb_test_sequence "info break" "info break" {
+ {Num\s+Type\s+Disp\sEnb Address\s+What}
+ {\d+\s+breakpoint\s+keep\s+y}
+ {breakpoint already hit\s\d+\stime}
+}
+
+
+#info inferiors
+# Num Description Executable
+#* 1 process 34544 /home/amd/rohit/samples/0_Intro/bit_extract/bit_extrac
+gdb_test_sequence "info inferiors" "info inferiors" {
+ {\s+Num\s+Description\s+Executable}
+ {\*\s\d+\s+process\s+\d+}
+}
+
+gdb_test "disable 1"
+gdb_test "continue -a" {.+Inferior\s\d+.+\sexited\snormally.+}
+gdb_test "run" {.+Inferior\s[\d].+\sexited\snormally.+}
+gdb_test "enable 1"
+send_gdb "run\n"
+set test_name "run"
+gdb_expect 60 {
+ -re "hit Breakpoint \[1-9\].*$gdb_prompt $" {
+ #print "$expect_out(buffer)"
+ pass "run" }
+ timeout {fail "(timeout) run"}
+}
+#gdb_test "run" {.+hit\sBreakpoint\s\d+.+\sVectorAdd\s\(.*\)\sat.*}
+
+sleep 5
+gdb_test "continue -a" {.+Inferior\s\d+.+\sexited\snormally.+}
+gdb_test "clear VectorAdd"
+gdb_test "run" {.+Inferior\s[\d].+\sexited\snormally.+}
+
+gdb_breakpoint "VectorAdd" "allow-pending"
+send_gdb "run\n"
+set test_name "run"
+gdb_expect 60 {
+ -re "hit Breakpoint \[1-9\].*$gdb_prompt $" {
+ #print "$expect_out(buffer)"
+ pass "run" }
+ timeout {fail "(timeout) run"}
+}
+#gdb_test "run" {.+hit\sBreakpoint\s\d+.+\sVectorAdd\s\(.*\)\sat.*}
+
+sleep 5
+
+gdb_test "continue -a" {.+Inferior\s[\d].+\sexited\snormally.+}
+
+
+#Switch to all stop mode
+gdb_test_no_output "set non-stop off"
+gdb_test "run" {.+hit\sBreakpoint\s\d+.+\sVectorAdd\s\(.*\)\sat.*}
+gdb_test "continue" {.+hit\sBreakpoint\s\d+.+\sVectorAdd\s\(.*\)\sat.*}
+gdb_test "disable 2"
+gdb_test "continue" {.+Inferior\s[\d].+\sexited\snormally.+}
+gdb_test "run" {.+Inferior\s[\d].+\sexited\snormally.+}
+gdb_test "enable 2"
+gdb_test "run" {.+hit\sBreakpoint\s[\d].+\sVectorAdd\s\(.*\)\sat.*}
+gdb_test "clear VectorAdd"
+gdb_test "continue" {.+Inferior\s[\d].+\sexited\snormally.+}
+
+gdb_exit
+
+
+
+
+
+