From 4eec2deb06db4a59966fc0669bf861fd92a4b152 Mon Sep 17 00:00:00 2001 From: Pedro Alves Date: Wed, 1 Apr 2015 13:38:06 +0100 Subject: [PATCH] Crash on thread id wrap around On GNU/Linux, if the target reuses the TID of a thread that GDB still has in its list marked as THREAD_EXITED, GDB crashes, like: (gdb) continue Continuing. src/gdb/thread.c:789: internal-error: set_running: Assertion `tp->state != THREAD_EXITED' failed. A problem internal to GDB has been detected, further debugging may prove unreliable. Quit this debugging session? (y or n) FAIL: gdb.threads/tid-reuse.exp: continue to breakpoint: after_reuse_time (GDB internal error) Here: (top-gdb) bt #0 internal_error (file=0x953dd8 "src/gdb/thread.c", line=789, fmt=0x953da0 "%s: Assertion `%s' failed.") at src/gdb/common/errors.c:54 #1 0x0000000000638514 in set_running (ptid=..., running=1) at src/gdb/thread.c:789 #2 0x00000000004bda42 in linux_handle_extended_wait (lp=0x16f5760, status=0, stopping=0) at src/gdb/linux-nat.c:2114 #3 0x00000000004bfa24 in linux_nat_filter_event (lwpid=20570, status=198015) at src/gdb/linux-nat.c:3127 #4 0x00000000004c070e in linux_nat_wait_1 (ops=0xe193d0, ptid=..., ourstatus=0x7fffffffd2c0, target_options=1) at src/gdb/linux-nat.c:3478 #5 0x00000000004c1015 in linux_nat_wait (ops=0xe193d0, ptid=..., ourstatus=0x7fffffffd2c0, target_options=1) at src/gdb/linux-nat.c:3722 #6 0x00000000004c92d2 in thread_db_wait (ops=0xd80b60 , ptid=..., ourstatus=0x7fffffffd2c0, options=1) at src/gdb/linux-thread-db.c:1525 #7 0x000000000066db43 in delegate_wait (self=0xd80b60 , arg1=..., arg2=0x7fffffffd2c0, arg3=1) at src/gdb/target-delegates.c:116 #8 0x000000000067e54b in target_wait (ptid=..., status=0x7fffffffd2c0, options=1) at src/gdb/target.c:2206 #9 0x0000000000625111 in fetch_inferior_event (client_data=0x0) at src/gdb/infrun.c:3275 #10 0x0000000000648a3b in inferior_event_handler (event_type=INF_REG_EVENT, client_data=0x0) at src/gdb/inf-loop.c:56 #11 0x00000000004c2ecb in handle_target_event (error=0, client_data=0x0) at src/gdb/linux-nat.c:4655 I managed to come up with a test that reliably reproduces this. It spawns enough threads for the pid number space to wrap around, so could potentially take a while. On my box that's 4 seconds; on gcc110, a PPC box which has max_pid set to 65536, it's over 10 seconds. So I made the test compute how long that would take, and cap the time waited if it would be unreasonably long. Tested on x86_64 Fedora 20. gdb/ChangeLog: 2015-04-01 Pedro Alves * linux-thread-db.c (record_thread): Readd the thread to gdb's list if it was marked exited. gdb/testsuite/ChangeLog: 2015-04-01 Pedro Alves * gdb.threads/tid-reuse.c: New file. * gdb.threads/tid-reuse.exp: New file. --- gdb/ChangeLog | 5 + gdb/linux-thread-db.c | 6 +- gdb/testsuite/ChangeLog | 5 + gdb/testsuite/gdb.threads/tid-reuse.c | 151 ++++++++++++++++++++++++ gdb/testsuite/gdb.threads/tid-reuse.exp | 80 +++++++++++++ 5 files changed, 245 insertions(+), 2 deletions(-) create mode 100644 gdb/testsuite/gdb.threads/tid-reuse.c create mode 100644 gdb/testsuite/gdb.threads/tid-reuse.exp diff --git a/gdb/ChangeLog b/gdb/ChangeLog index 88e4a0caac..a08439b4fc 100644 --- a/gdb/ChangeLog +++ b/gdb/ChangeLog @@ -1,3 +1,8 @@ +2015-04-01 Pedro Alves + + * linux-thread-db.c (record_thread): Readd the thread to gdb's + list if it was marked exited. + 2015-04-01 H.J. Lu * configure: Regenerated. diff --git a/gdb/linux-thread-db.c b/gdb/linux-thread-db.c index 88094a7bd9..886d8ac097 100644 --- a/gdb/linux-thread-db.c +++ b/gdb/linux-thread-db.c @@ -1346,8 +1346,10 @@ record_thread (struct thread_db_info *info, priv->tid = ti_p->ti_tid; update_thread_state (priv, ti_p); - /* Add the thread to GDB's thread list. */ - if (tp == NULL) + /* Add the thread to GDB's thread list. If we already know about a + thread with this PTID, but it's marked exited, then the kernel + reused the tid of an old thread. */ + if (tp == NULL || tp->state == THREAD_EXITED) tp = add_thread_with_info (ptid, priv); else tp->priv = priv; diff --git a/gdb/testsuite/ChangeLog b/gdb/testsuite/ChangeLog index 8a68ec7309..d906a13dc8 100644 --- a/gdb/testsuite/ChangeLog +++ b/gdb/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2015-04-01 Pedro Alves + + * gdb.threads/tid-reuse.c: New file. + * gdb.threads/tid-reuse.exp: New file. + 2015-03-31 Sergio Durigan Junior PR corefiles/16092 diff --git a/gdb/testsuite/gdb.threads/tid-reuse.c b/gdb/testsuite/gdb.threads/tid-reuse.c new file mode 100644 index 0000000000..204c15ab21 --- /dev/null +++ b/gdb/testsuite/gdb.threads/tid-reuse.c @@ -0,0 +1,151 @@ +/* This testcase is part of GDB, the GNU debugger. + + Copyright 2015 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include + +/* How many threads fit in the target's thread number space. */ +long tid_max = -1; + +/* Number of threads spawned. */ +unsigned long thread_counter; + +/* How long it takes to spawn as many threads as fits in the thread + number space. On systems where thread IDs are just monotonically + incremented, this is enough for the tid numbers to wrap around. On + targets that randomize thread IDs, this is enough time to give each + number in the thread number space some chance of reuse. It'll be + capped to a lower value if we can't compute it. */ +unsigned int reuse_time = -1; + +void * +do_nothing_thread_func (void *arg) +{ + usleep (1); + return NULL; +} + +void * +spawner_thread_func (void *arg) +{ + while (1) + { + pthread_t child; + int rc; + + thread_counter++; + + rc = pthread_create (&child, NULL, do_nothing_thread_func, NULL); + assert (rc == 0); + + rc = pthread_join (child, NULL); + assert (rc == 0); + } + + return NULL; +} + +/* Called after the program is done counting number of spawned threads + for a period, to compute REUSE_TIME. */ + +void +after_count (void) +{ +} + +/* Called after enough time has passed for TID reuse to occur. */ + +void +after_reuse_time (void) +{ +} + +#ifdef __linux__ + +/* Get the running system's configured pid_max. */ + +static int +linux_proc_get_pid_max (void) +{ + static const char filename[] ="/proc/sys/kernel/pid_max"; + FILE *file; + char buf[100]; + int retval = -1; + + file = fopen (filename, "r"); + if (file == NULL) + { + fprintf (stderr, "unable to open %s\n", filename); + return -1; + } + + if (fgets (buf, sizeof (buf), file) != NULL) + retval = strtol (buf, NULL, 10); + + fclose (file); + return retval; +} + +#endif + +int +main (int argc, char *argv[]) +{ + pthread_t child; + int rc; + unsigned int reuse_time_raw = 0; + + rc = pthread_create (&child, NULL, spawner_thread_func, NULL); + assert (rc == 0); + +#define COUNT_TIME 2 + sleep (COUNT_TIME); + +#ifdef __linux__ + tid_max = linux_proc_get_pid_max (); +#endif + /* If we don't know how many threads it would take to use the whole + number space on this system, just run the test for a bit. */ + if (tid_max > 0) + { + reuse_time_raw = tid_max / ((float) thread_counter / COUNT_TIME) + 0.5; + + /* Give it a bit more, just in case. */ + reuse_time = reuse_time_raw + 3; + } + + /* 4 seconds were sufficient on the machine this was first observed, + an Intel i7-2620M @ 2.70GHz running Linux 3.18.7, with + pid_max=32768. Going forward, as machines get faster, this will + need less time, unless pid_max is set to a very high number. To + avoid unreasonably long test time, cap to an upper bound. */ + if (reuse_time > 60) + reuse_time = 60; + printf ("thread_counter=%lu, tid_max = %ld, reuse_time_raw=%u, reuse_time=%u\n", + thread_counter, tid_max, reuse_time_raw, reuse_time); + after_count (); + + sleep (reuse_time); + + after_reuse_time (); + return 0; +} diff --git a/gdb/testsuite/gdb.threads/tid-reuse.exp b/gdb/testsuite/gdb.threads/tid-reuse.exp new file mode 100644 index 0000000000..1f2effcf44 --- /dev/null +++ b/gdb/testsuite/gdb.threads/tid-reuse.exp @@ -0,0 +1,80 @@ +# Copyright 2015 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# Test running a program that spawns enough threads that the tid of an +# exited thread is reused. GDB should not crash when this happens. + +standard_testfile + +if {[prepare_for_testing "failed to prepare" $testfile $srcfile { debug pthreads }] == -1} { + return -1 +} + +clean_restart ${binfile} + +if ![runto main] { + fail "Can't run to main" + return -1 +} + +delete_breakpoints + +# Avoid dumping a ton of thread create/exit info in the logs. +gdb_test_no_output "set print thread-events off" + +gdb_breakpoint "after_count" +gdb_continue_to_breakpoint "after_count" + +# Get value of VARIABLE in the inferior. + +proc getvar {variable} { + global decimal + global gdb_prompt + + set value 0 + + set msg "get $variable" + gdb_test_multiple "print $variable" $msg { + -re " = ($decimal)\r\n$gdb_prompt $" { + set value $expect_out(1,string) + pass $msg + } + } + return $value +} + +set reuse_time [getvar "reuse_time"] + +# Now the real test. Run to a breakpoint in a thread that exits +# immediately once resumed. The thread ends up left on the thread +# list, marked exited (exactly because it's the selected thread). +gdb_breakpoint "do_nothing_thread_func" +gdb_continue_to_breakpoint "do_nothing_thread_func" + +delete_breakpoints + +# Let the program continue, constantly spawning short-lived threads +# (one at a time). On some targets, after a bit, a new thread reuses +# the tid of the old exited thread that we still have selected. GDB +# should not crash in this situation. Of course, if the tid number +# space is shared between all processes in the system (such as on +# Linux), there's a chance that some other process grabs the TID, but +# that can never cause a spurious test fail. +gdb_breakpoint "after_reuse_time" + +# Higher than what the test program sleeps before exiting. +set timeout [expr $reuse_time * 2] + +gdb_continue_to_breakpoint "after_reuse_time" -- 2.34.1