gdb: fix handling of vfork by multi-threaded program (follow-fork-mode=parent, detach...

author Simon Marchi <simon.marchi@efficios.com>

Fri, 14 Jan 2022 20:40:59 +0000 (15:40 -0500)

committer Simon Marchi <simon.marchi@efficios.com>

Tue, 18 Jan 2022 18:22:37 +0000 (13:22 -0500)
author Simon Marchi <simon.marchi@efficios.com>
Fri, 14 Jan 2022 20:40:59 +0000 (15:40 -0500)
committer Simon Marchi <simon.marchi@efficios.com>
Tue, 18 Jan 2022 18:22:37 +0000 (13:22 -0500)
diff --git a/gdb/infrun.c b/gdb/infrun.c

index f182e289170dcbcc0a06fbaba622630af004e6e0..b037ab1b05a9d9b1668d401c84b8ef24579655b4 100644 (file)
--- a/gdb/infrun.c
+++ b/gdb/infrun.c
@@ -95,6 +95,11 @@ static void resume (gdb_signal sig);
  
  static void wait_for_inferior (inferior *inf);
  
+static void restart_threads (struct thread_info *event_thread,
+                            inferior *inf = nullptr);
+
+static bool start_step_over (void);
+
  /* Asynchronous signal handler registered as event loop source for
     when we have pending events ready to be passed to the core.  */
  static struct async_event_handler *infrun_async_inferior_event_token;
@@ -429,6 +434,9 @@ holding the child stopped.  Try \"set detach-on-fork\" or \
        return true;
      }
  
+  inferior *parent_inf = current_inferior ();
+  gdb_assert (parent_inf->thread_waiting_for_vfork_done == nullptr);
+
    if (!follow_child)
      {
        /* Detach new forked process?  */
@@ -461,12 +469,11 @@ holding the child stopped.  Try \"set detach-on-fork\" or \
         }
        else
         {
-         struct inferior *parent_inf, *child_inf;
+         inferior *child_inf;
  
           /* Add process to GDB's tables.  */
           child_inf = add_inferior (child_ptid.pid ());
  
-         parent_inf = current_inferior ();
           child_inf->attach_flag = parent_inf->attach_flag;
           copy_terminal_info (child_inf, parent_inf);
           child_inf->gdbarch = parent_inf->gdbarch;
@@ -530,10 +537,6 @@ holding the child stopped.  Try \"set detach-on-fork\" or \
  
        if (has_vforked)
         {
-         struct inferior *parent_inf;
-
-         parent_inf = current_inferior ();
-
           /* If we detached from the child, then we have to be careful
              to not insert breakpoints in the parent until the child
              is done with the shared memory region.  However, if we're
@@ -549,7 +552,7 @@ holding the child stopped.  Try \"set detach-on-fork\" or \
    else
      {
        /* Follow the child.  */
-      struct inferior *parent_inf, *child_inf;
+      inferior *child_inf;
        struct program_space *parent_pspace;
  
        if (print_inferior_events)
@@ -570,7 +573,6 @@ holding the child stopped.  Try \"set detach-on-fork\" or \
  
        child_inf = add_inferior (child_ptid.pid ());
  
-      parent_inf = current_inferior ();
        child_inf->attach_flag = parent_inf->attach_flag;
        copy_terminal_info (child_inf, parent_inf);
        child_inf->gdbarch = parent_inf->gdbarch;
@@ -604,7 +606,6 @@ holding the child stopped.  Try \"set detach-on-fork\" or \
             child_inf->pending_detach = 0;
             parent_inf->vfork_child = child_inf;
             parent_inf->pending_detach = detach_fork;
-           parent_inf->thread_waiting_for_vfork_done = nullptr;
           }
         else if (detach_fork)
           {
@@ -770,6 +771,12 @@ follow_fork ()
         parent = inferior_ptid;
         child = tp->pending_follow.value.related_pid;
  
+       /* If handling a vfork, stop all the inferior's threads, they will be
+          restarted when the vfork shared region is complete.  */
+       if (tp->pending_follow.kind == TARGET_WAITKIND_VFORKED
+           && target_is_non_stop_p ())
+         stop_all_threads ("handling vfork", tp->inf);
+
         process_stratum_target *parent_targ = tp->inf->process_target ();
         /* Set up inferior(s) as specified by the caller, and tell the
            target to do whatever is necessary to follow either parent
@@ -1046,6 +1053,53 @@ handle_vfork_child_exec_or_exit (int exec)
      }
  }
  
+/* Handle TARGET_WAITKIND_VFORK_DONE.  */
+
+static void
+handle_vfork_done (thread_info *event_thread)
+{
+  /* We only care about this event if inferior::thread_waiting_for_vfork_done is
+     set, that is if we are waiting for a vfork child not under our control
+     (because we detached it) to exec or exit.
+
+     If an inferior has vforked and we are debugging the child, we don't use
+     the vfork-done event to get notified about the end of the shared address
+     space window).  We rely instead on the child's exec or exit event, and the
+     inferior::vfork_{parent,child} fields are used instead.  See
+     handle_vfork_child_exec_or_exit for that.  */
+  if (event_thread->inf->thread_waiting_for_vfork_done == nullptr)
+    {
+      infrun_debug_printf ("not waiting for a vfork-done event");
+      return;
+    }
+
+  INFRUN_SCOPED_DEBUG_ENTER_EXIT;
+
+  /* We stopped all threads (other than the vforking thread) of the inferior in
+     follow_fork and kept them stopped until now.  It should therefore not be
+     possible for another thread to have reported a vfork during that window.
+     If THREAD_WAITING_FOR_VFORK_DONE is set, it has to be the same thread whose
+     vfork-done we are handling right now.  */
+  gdb_assert (event_thread->inf->thread_waiting_for_vfork_done == event_thread);
+
+  event_thread->inf->thread_waiting_for_vfork_done = nullptr;
+  event_thread->inf->pspace->breakpoints_not_allowed = 0;
+
+  /* On non-stop targets, we stopped all the inferior's threads in follow_fork,
+     resume them now.  On all-stop targets, everything that needs to be resumed
+     will be when we resume the event thread.  */
+  if (target_is_non_stop_p ())
+    {
+      /* restart_threads and start_step_over may change the current thread, make
+        sure we leave the event thread as the current thread.  */
+      scoped_restore_current_thread restore_thread;
+
+      insert_breakpoints ();
+      restart_threads (event_thread, event_thread->inf);
+      start_step_over ();
+    }
+}
+
  /* Enum strings for "set|show follow-exec-mode".  */
  
  static const char follow_exec_mode_new[] = "new";
@@ -1901,6 +1955,16 @@ start_step_over (void)
           continue;
         }
  
+      if (tp->inf->thread_waiting_for_vfork_done)
+       {
+         /* When we stop all threads, handling a vfork, any thread in the step
+            over chain remains there.  A user could also try to continue a
+            thread stopped at a breakpoint while another thread is waiting for
+            a vfork-done event.  In any case, we don't want to start a step
+            over right now.  */
+         continue;
+       }
+
        /* Remove thread from the THREADS_TO_STEP chain.  If anything goes wrong
          while we try to prepare the displaced step, we don't add it back to
          the global step over chain.  This is to avoid a thread staying in the
@@ -2137,8 +2201,41 @@ internal_resume_ptid (int user_step)
       return a wildcard ptid.  */
    if (target_is_non_stop_p ())
      return inferior_ptid;
-  else
-    return user_visible_resume_ptid (user_step);
+
+  /* The rest of the function assumes non-stop==off and
+     target-non-stop==off.
+
+     If a thread is waiting for a vfork-done event, it means breakpoints are out
+     for this inferior (well, program space in fact).  We don't want to resume
+     any thread other than the one waiting for vfork done, otherwise these other
+     threads could miss breakpoints.  So if a thread in the resumption set is
+     waiting for a vfork-done event, resume only that thread.
+
+     The resumption set width depends on whether schedule-multiple is on or off.
+
+     Note that if the target_resume interface was more flexible, we could be
+     smarter here when schedule-multiple is on.  For example, imagine 3
+     inferiors with 2 threads each (1.1, 1.2, 2.1, 2.2, 3.1 and 3.2).  Threads
+     2.1 and 3.2 are both waiting for a vfork-done event.  Then we could ask the
+     target(s) to resume:
+
+      - All threads of inferior 1
+      - Thread 2.1
+      - Thread 3.2
+
+     Since we don't have that flexibility (we can only pass one ptid), just
+     resume the first thread waiting for a vfork-done event we find (e.g. thread
+     2.1).  */
+  if (sched_multi)
+    {
+      for (inferior *inf : all_non_exited_inferiors ())
+       if (inf->thread_waiting_for_vfork_done != nullptr)
+         return inf->thread_waiting_for_vfork_done->ptid;
+    }
+  else if (current_inferior ()->thread_waiting_for_vfork_done != nullptr)
+    return current_inferior ()->thread_waiting_for_vfork_done->ptid;
+
+  return user_visible_resume_ptid (user_step);
  }
  
  /* Wrapper for target_resume, that handles infrun-specific
@@ -3261,6 +3358,19 @@ proceed (CORE_ADDR addr, enum gdb_signal siggnal)
                 continue;
               }
  
+           /* If a thread of that inferior is waiting for a vfork-done
+              (for a detached vfork child to exec or exit), breakpoints are
+              removed.  We must not resume any thread of that inferior, other
+              than the one waiting for the vfork-done.  */
+           if (tp->inf->thread_waiting_for_vfork_done != nullptr
+               && tp != tp->inf->thread_waiting_for_vfork_done)
+             {
+               infrun_debug_printf ("[%s] another thread of this inferior is "
+                                    "waiting for vfork-done",
+                                    tp->ptid.to_string ().c_str ());
+               continue;
+             }
+
             infrun_debug_printf ("resuming %s",
                                  target_pid_to_str (tp->ptid).c_str ());
  
@@ -3271,7 +3381,12 @@ proceed (CORE_ADDR addr, enum gdb_signal siggnal)
               error (_("Command aborted."));
           }
        }
-    else if (!cur_thr->resumed && !thread_is_in_step_over_chain (cur_thr))
+    else if (!cur_thr->resumed
+            && !thread_is_in_step_over_chain (cur_thr)
+            /* In non-stop, forbid resume a thread if some other thread of
+               that inferior is waiting for a vfork-done event (this means
+               breakpoints are out for this inferior).  */
+            && !(non_stop && cur_thr->inf->thread_waiting_for_vfork_done))
        {
         /* The thread wasn't started, and isn't queued, run it now.  */
         reset_ecs (ecs, cur_thr);
@@ -3771,8 +3886,6 @@ struct wait_one_event
  };
  
  static bool handle_one (const wait_one_event &event);
-static void restart_threads (struct thread_info *event_thread,
-                            inferior *inf = nullptr);
  
  /* Prepare and stabilize the inferior for detaching it.  E.g.,
     detaching while a thread is displaced stepping is a recipe for
@@ -5683,8 +5796,8 @@ handle_inferior_event (struct execution_control_state *ecs)
  
        context_switch (ecs);
  
-      current_inferior ()->thread_waiting_for_vfork_done = nullptr;
-      current_inferior ()->pspace->breakpoints_not_allowed = 0;
+      handle_vfork_done (ecs->event_thread);
+      gdb_assert (inferior_thread () == ecs->event_thread);
  
        if (handle_stop_requested (ecs))
         return;
diff --git a/gdb/testsuite/gdb.threads/vfork-multi-thread.c b/gdb/testsuite/gdb.threads/vfork-multi-thread.c

new file mode 100644 (file)

index 0000000..cd3dfcc
--- /dev/null
+++ b/gdb/testsuite/gdb.threads/vfork-multi-thread.c
@@ -0,0 +1,74 @@
+#include <assert.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/wait.h>
+
+// Start with:
+//
+//   ./gdb -nx -q --data-directory=data-directory repro -ex "tb break_here_first" -ex r -ex "b should_break_here"
+//
+// Then do "continue".
+//
+// The main thread will likely cross should_break_here while we are handling
+// the vfork and breakpoints are removed, therefore missing the breakpoint.
+
+static volatile int release_vfork = 0;
+static volatile int release_main = 0;
+
+static void *vforker(void *arg)
+{
+  while (!release_vfork);
+
+  pid_t pid = vfork ();
+  if (pid == 0) {
+    /* A vfork child is not supposed to mess with the state of the program,
+       but it is helpful for the purpose of this test.  */
+    release_main = 1;
+    _exit(7);
+  }
+
+  int stat;
+  int ret = waitpid (pid, &stat, 0);
+  assert (ret == pid);
+  assert (WIFEXITED (stat));
+  assert (WEXITSTATUS (stat) == 7);
+
+  return NULL;
+}
+
+static void should_break_here(void) {}
+
+int main()
+{
+
+  pthread_t thread;
+  int ret = pthread_create(&thread, NULL, vforker, NULL);
+  assert(ret == 0);
+
+  /* We break here first, while the thread is stuck on `!release_fork`.  */
+  release_vfork = 1;
+
+  /* We set a breakpoint on should_break_here.
+
+     We then set "release_fork" from the debugger and continue.  The main
+     thread hangs on `!release_main` while the non-main thread vforks.  During
+     the window of time where the two processes have a shared address space
+     (after vfork, before _exit), GDB removes the breakpoints from the address
+     space.  During that window, only the vfork-ing thread (the non-main
+     thread) is frozen by the kernel.  The main thread is free to execute.  The
+     child process sets `release_main`, releasing the main thread. A buggy GDB
+     would let the main thread execute during that window, leading to the
+     breakpoint on should_break_here being missed.  A fixed GDB does not resume
+     the threads of the vforking process other than the vforking thread.  When
+     the vfork child exits, the fixed GDB resumes the main thread, after
+     breakpoints are reinserted, so the breakpoint is not missed.  */
+
+  while (!release_main);
+
+  should_break_here();
+
+  pthread_join (thread, NULL);
+
+  return 6;
+}
diff --git a/gdb/testsuite/gdb.threads/vfork-multi-thread.exp b/gdb/testsuite/gdb.threads/vfork-multi-thread.exp

new file mode 100644 (file)

index 0000000..d405411
--- /dev/null
+++ b/gdb/testsuite/gdb.threads/vfork-multi-thread.exp
@@ -0,0 +1,96 @@
+# Copyright 2022 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# Test that a multi-threaded program doing a vfork doesn't miss breakpoints.
+#
+# When a program vforks, its address space is shared with the parent.  When we
+# detach a vfork child, we must keep breakpoints out of that shared address space
+# until the child either exits or execs, so that the child does not hit a
+# breakpoint while out of GDB's control.  During that time, threads from
+# the parent must be held stopped, otherwise they could miss breakpoints.
+#
+# The thread that did the vfork is suspended by the kernel, so it's not a
+# concern.  The other threads need to be manually stopped by GDB and resumed
+# once the vfork critical region is done.
+#
+# This test spawns one thread that calls vfork.  Meanwhile, the main thread
+# crosses a breakpoint.  A buggy GDB would let the main thread run while
+# breakpoints are removed, so the main thread would miss the breakpoint and run
+# until exit.
+
+standard_testfile
+
+if { [build_executable "failed to prepare" ${testfile} ${srcfile} {debug pthreads}] } {
+    return
+}
+
+set any "\[^\r\n\]*"
+
+# A bunch of util procedures to continue an inferior to an expected point.
+
+proc continue_to_parent_breakpoint {} {
+    gdb_test "continue" \
+       "hit Breakpoint .* should_break_here .*" \
+       "continue parent to breakpoint"
+}
+
+proc continue_to_parent_end {} {
+    gdb_test "continue" "Inferior 1.*exited with code 06.*" \
+       "continue parent to end"
+}
+
+# Run the test with the given GDB settings.
+
+proc do_test { target-non-stop non-stop follow-fork-mode detach-on-fork schedule-multiple } {
+    save_vars { ::GDBFLAGS } {
+       append ::GDBFLAGS " -ex \"maintenance set target-non-stop ${target-non-stop}\""
+       append ::GDBFLAGS " -ex \"set non-stop ${non-stop}\""
+       clean_restart ${::binfile}
+    }
+
+    gdb_test_no_output "set follow-fork-mode ${follow-fork-mode}"
+    gdb_test_no_output "set detach-on-fork ${detach-on-fork}"
+    gdb_test_no_output "set schedule-multiple ${schedule-multiple}"
+
+    # The message about thread 2 of inferior 1 exiting happens at a somewhat
+    # unpredictable moment, it's simpler to silence it than to try to match it.
+    gdb_test_no_output "set print thread-events off"
+
+    if { ![runto_main] } {
+       return
+    }
+
+    # The main thread is expected to hit this breakpoint.
+    gdb_test "break should_break_here" "Breakpoint $::decimal at .*"
+
+    continue_to_parent_breakpoint
+    continue_to_parent_end
+}
+
+# We only test with follow-fork-mode=parent and detach-on-fork=on at the
+# moment, but the loops below are written to make it easy to add other values
+# on these axes in the future.
+
+foreach_with_prefix target-non-stop {auto on off} {
+    foreach_with_prefix non-stop {off on} {
+       foreach_with_prefix follow-fork-mode {parent} {
+           foreach_with_prefix detach-on-fork {on} {
+               foreach_with_prefix schedule-multiple {off on} {
+                   do_test ${target-non-stop} ${non-stop} ${follow-fork-mode} ${detach-on-fork} ${schedule-multiple}
+               }
+           }
+       }
+    }
+}
author	Simon Marchi <simon.marchi@efficios.com>
	Fri, 14 Jan 2022 20:40:59 +0000 (15:40 -0500)
committer	Simon Marchi <simon.marchi@efficios.com>
	Tue, 18 Jan 2022 18:22:37 +0000 (13:22 -0500)
gdb/infrun.c		patch \| blob \| blame \| history
gdb/testsuite/gdb.threads/vfork-multi-thread.c	[new file with mode: 0644]	patch \| blob
gdb/testsuite/gdb.threads/vfork-multi-thread.exp	[new file with mode: 0644]	patch \| blob