From f9ee990406b9a991be755cc4bf111df90bc8c682 Mon Sep 17 00:00:00 2001 From: Simon Marchi Date: Sat, 15 Jan 2022 10:55:31 -0500 Subject: [PATCH] gdbserver: report correct status in thread stop race condition The test introduced by the following patch would sometimes fail in this configuration: FAIL: gdb.threads/next-fork-other-thread.exp: fork_func=vfork: target-non-stop=on: non-stop=off: displaced-stepping=auto: i=14: next to for loop The test has multiple threads constantly forking or vforking while the main thread keep doing "next"s. (After writing the commit message, I realized this also fixes a similar failure in gdb.threads/forking-threads-plus-breakpoint.exp with the native-gdbserver and native-extended-gdbserver boards.) As stop_all_threads is called, because the main thread finished its "next", it inevitably happens at some point that we ask the remote target to stop a thread and wait() reports that this thread stopped with a fork or vfork event, instead of the SIGSTOP we sent to try to stop it. While running this test, I attached to GDBserver and stopped at linux-low.cc:3626. We can see that the status pulled from the kernel for 2742805 is indeed a vfork event: (gdb) p/x w $3 = 0x2057f (gdb) p WIFSTOPPED(w) $4 = true (gdb) p WSTOPSIG(w) $5 = 5 (gdb) p/x (w >> 8) & (PTRACE_EVENT_VFORK << 8) $6 = 0x200 However, the statement at line 3626 overrides that: ourstatus->set_stopped (gdb_signal_from_host (WSTOPSIG (w))); OURSTATUS becomes "stopped by a SIGTRAP". The information about the fork or vfork is lost. It's then all downhill from there, stop_all_threads eventually asks for a thread list update. That thread list includes the child of that forgotten fork or vfork, the remote target goes "oh cool, a new process, let's attach to it!", when in fact that vfork child's destiny was to be detached. My reverse-engineered understanding of the code around there is that the if/else between lines 3562 and 3583 (in the original code) makes sure OURSTATUS is always initialized (not "ignore"). Either the details are already in event_child->waitstatus (in the case of fork/vfork, for example), in which case we just copy event_child->waitstatus to ourstatus. Or, if the event is a plain "stopped by a signal" or a syscall event, OURSTATUS is set to "stopped", but without a signal number. Lines 3601 to 3629 (in the original code) serve to fill in that last bit of information. The problem is that when `w` holds the vfork status, the code wrongfully takes this branch, because WSTOPSIG(w) returns SIGTRAP: else if (current_thread->last_resume_kind == resume_stop && WSTOPSIG (w) != SIGSTOP) The intent of this branch is, for example, when we sent SIGSTOP to try to stop a thread, but wait() reports that it stopped with another signal (that it must have received from somewhere else simultaneously), say SIGWINCH. In that case, we want to report the SIGWINCH. But in our fork/vfork case, we don't want to take this branch, as the thread didn't really stop because it received a signal. For the non "stopped by a signal" and non "syscall signal" cases, we would ideally skip over all that snippet that fills in the signal or syscall number. The fix I propose is to move this snipppet of the else branch of the if/else above. In addition to moving the code, the last two "else if" branches: else if (current_thread->last_resume_kind == resume_stop && WSTOPSIG (w) != SIGSTOP) { /* A thread that has been requested to stop by GDB with vCont;t, but, it stopped for other reasons. */ ourstatus->set_stopped (gdb_signal_from_host (WSTOPSIG (w))); } else if (ourstatus->kind () == TARGET_WAITKIND_STOPPED) ourstatus->set_stopped (gdb_signal_from_host (WSTOPSIG (w))); are changed into a single else: else ourstatus->set_stopped (gdb_signal_from_host (WSTOPSIG (w))); This is the default path we take if: - W is not a syscall status - W does not represent a SIGSTOP that have sent to stop the thread and therefore want to suppress it Change-Id: If2dc1f0537a549c293f7fa3c53efd00e3e194e79 --- gdbserver/linux-low.cc | 55 +++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/gdbserver/linux-low.cc b/gdbserver/linux-low.cc index 3c68c57384..b70a3aa2d1 100644 --- a/gdbserver/linux-low.cc +++ b/gdbserver/linux-low.cc @@ -3589,6 +3589,9 @@ linux_process_target::wait_1 (ptid_t ptid, target_waitstatus *ourstatus, unstop_all_lwps (1, event_child); } + /* At this point, we haven't set OURSTATUS. This is where we do it. */ + gdb_assert (ourstatus->kind == TARGET_WAITKIND_IGNORE); + if (event_child->waitstatus.kind != TARGET_WAITKIND_IGNORE) { /* If the reported event is an exit, fork, vfork or exec, let @@ -3607,7 +3610,31 @@ linux_process_target::wait_1 (ptid_t ptid, target_waitstatus *ourstatus, event_child->waitstatus.kind = TARGET_WAITKIND_IGNORE; } else - ourstatus->kind = TARGET_WAITKIND_STOPPED; + { + /* The LWP stopped due to a plain signal or a syscall signal. Either way, + event_chid->waitstatus wasn't filled in with the details, so look at + the wait status W. */ + if (WSTOPSIG (w) == SYSCALL_SIGTRAP) + { + get_syscall_trapinfo (event_child, + &ourstatus->value.syscall_number); + ourstatus->kind = event_child->syscall_state; + } + else if (current_thread->last_resume_kind == resume_stop + && WSTOPSIG (w) == SIGSTOP) + { + /* A thread that has been requested to stop by GDB with vCont;t, + and it stopped cleanly, so report as SIG0. The use of + SIGSTOP is an implementation detail. */ + ourstatus->kind = TARGET_WAITKIND_STOPPED; + ourstatus->value.sig = GDB_SIGNAL_0; + } + else + { + ourstatus->kind = TARGET_WAITKIND_STOPPED; + ourstatus->value.sig = gdb_signal_from_host (WSTOPSIG (w)); + } + } /* Now that we've selected our final event LWP, un-adjust its PC if it was a software breakpoint, and the client doesn't know we can @@ -3625,32 +3652,6 @@ linux_process_target::wait_1 (ptid_t ptid, target_waitstatus *ourstatus, } } - if (WSTOPSIG (w) == SYSCALL_SIGTRAP) - { - get_syscall_trapinfo (event_child, - &ourstatus->value.syscall_number); - ourstatus->kind = event_child->syscall_state; - } - else if (current_thread->last_resume_kind == resume_stop - && WSTOPSIG (w) == SIGSTOP) - { - /* A thread that has been requested to stop by GDB with vCont;t, - and it stopped cleanly, so report as SIG0. The use of - SIGSTOP is an implementation detail. */ - ourstatus->value.sig = GDB_SIGNAL_0; - } - else if (current_thread->last_resume_kind == resume_stop - && WSTOPSIG (w) != SIGSTOP) - { - /* A thread that has been requested to stop by GDB with vCont;t, - but, it stopped for other reasons. */ - ourstatus->value.sig = gdb_signal_from_host (WSTOPSIG (w)); - } - else if (ourstatus->kind == TARGET_WAITKIND_STOPPED) - { - ourstatus->value.sig = gdb_signal_from_host (WSTOPSIG (w)); - } - gdb_assert (step_over_bkpt == null_ptid); if (debug_threads) -- 2.34.1