Fix memory access violations triggered by running strip on fuzzed binaries.
[deliverable/binutils-gdb.git] / binutils / strings.c
1 /* strings -- print the strings of printable characters in files
2 Copyright (C) 1993-2015 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
17 02110-1301, USA. */
18 \f
19 /* Usage: strings [options] file...
20
21 Options:
22 --all
23 -a
24 - Scan each file in its entirety.
25
26 --data
27 -d Scan only the initialized data section(s) of object files.
28
29 --print-file-name
30 -f Print the name of the file before each string.
31
32 --bytes=min-len
33 -n min-len
34 -min-len Print graphic char sequences, MIN-LEN or more bytes long,
35 that are followed by a NUL or a newline. Default is 4.
36
37 --radix={o,x,d}
38 -t {o,x,d} Print the offset within the file before each string,
39 in octal/hex/decimal.
40
41 --include-all-whitespace
42 -w By default tab and space are the only whitepace included in graphic
43 char sequences. This option considers all of isspace() valid.
44
45 -o Like -to. (Some other implementations have -o like -to,
46 others like -td. We chose one arbitrarily.)
47
48 --encoding={s,S,b,l,B,L}
49 -e {s,S,b,l,B,L}
50 Select character encoding: 7-bit-character, 8-bit-character,
51 bigendian 16-bit, littleendian 16-bit, bigendian 32-bit,
52 littleendian 32-bit.
53
54 --target=BFDNAME
55 -T {bfdname}
56 Specify a non-default object file format.
57
58 --help
59 -h Print the usage message on the standard output.
60
61 --version
62 -V
63 -v Print the program version number.
64
65 Written by Richard Stallman <rms@gnu.ai.mit.edu>
66 and David MacKenzie <djm@gnu.ai.mit.edu>. */
67
68 #include "sysdep.h"
69 #include "bfd.h"
70 #include "getopt.h"
71 #include "libiberty.h"
72 #include "safe-ctype.h"
73 #include "bucomm.h"
74
75 #define STRING_ISGRAPHIC(c) \
76 ( (c) >= 0 \
77 && (c) <= 255 \
78 && ((c) == '\t' || ISPRINT (c) || (encoding == 'S' && (c) > 127) \
79 || (include_all_whitespace == TRUE && ISSPACE (c))) \
80 )
81
82 #ifndef errno
83 extern int errno;
84 #endif
85
86 /* The BFD section flags that identify an initialized data section. */
87 #define DATA_FLAGS (SEC_ALLOC | SEC_LOAD | SEC_HAS_CONTENTS)
88
89 /* Radix for printing addresses (must be 8, 10 or 16). */
90 static int address_radix;
91
92 /* Minimum length of sequence of graphic chars to trigger output. */
93 static int string_min;
94
95 /* Whether or not we include all whitespace as a graphic char. */
96 static bfd_boolean include_all_whitespace;
97
98 /* TRUE means print address within file for each string. */
99 static bfd_boolean print_addresses;
100
101 /* TRUE means print filename for each string. */
102 static bfd_boolean print_filenames;
103
104 /* TRUE means for object files scan only the data section. */
105 static bfd_boolean datasection_only;
106
107 /* TRUE if we found an initialized data section in the current file. */
108 static bfd_boolean got_a_section;
109
110 /* The BFD object file format. */
111 static char *target;
112
113 /* The character encoding format. */
114 static char encoding;
115 static int encoding_bytes;
116
117 static struct option long_options[] =
118 {
119 {"all", no_argument, NULL, 'a'},
120 {"data", no_argument, NULL, 'd'},
121 {"print-file-name", no_argument, NULL, 'f'},
122 {"bytes", required_argument, NULL, 'n'},
123 {"radix", required_argument, NULL, 't'},
124 {"include-all-whitespace", required_argument, NULL, 'w'},
125 {"encoding", required_argument, NULL, 'e'},
126 {"target", required_argument, NULL, 'T'},
127 {"help", no_argument, NULL, 'h'},
128 {"version", no_argument, NULL, 'v'},
129 {NULL, 0, NULL, 0}
130 };
131
132 /* Records the size of a named file so that we
133 do not repeatedly run bfd_stat() on it. */
134
135 typedef struct
136 {
137 const char * filename;
138 bfd_size_type filesize;
139 } filename_and_size_t;
140
141 static void strings_a_section (bfd *, asection *, void *);
142 static bfd_boolean strings_object_file (const char *);
143 static bfd_boolean strings_file (char *);
144 static void print_strings (const char *, FILE *, file_ptr, int, int, char *);
145 static void usage (FILE *, int);
146 static long get_char (FILE *, file_ptr *, int *, char **);
147 \f
148 int main (int, char **);
149
150 int
151 main (int argc, char **argv)
152 {
153 int optc;
154 int exit_status = 0;
155 bfd_boolean files_given = FALSE;
156 char *s;
157 int numeric_opt = 0;
158
159 #if defined (HAVE_SETLOCALE)
160 setlocale (LC_ALL, "");
161 #endif
162 bindtextdomain (PACKAGE, LOCALEDIR);
163 textdomain (PACKAGE);
164
165 program_name = argv[0];
166 xmalloc_set_program_name (program_name);
167 bfd_set_error_program_name (program_name);
168
169 expandargv (&argc, &argv);
170
171 string_min = 4;
172 include_all_whitespace = FALSE;
173 print_addresses = FALSE;
174 print_filenames = FALSE;
175 if (DEFAULT_STRINGS_ALL)
176 datasection_only = FALSE;
177 else
178 datasection_only = TRUE;
179 target = NULL;
180 encoding = 's';
181
182 while ((optc = getopt_long (argc, argv, "adfhHn:wot:e:T:Vv0123456789",
183 long_options, (int *) 0)) != EOF)
184 {
185 switch (optc)
186 {
187 case 'a':
188 datasection_only = FALSE;
189 break;
190
191 case 'd':
192 datasection_only = TRUE;
193 break;
194
195 case 'f':
196 print_filenames = TRUE;
197 break;
198
199 case 'H':
200 case 'h':
201 usage (stdout, 0);
202
203 case 'n':
204 string_min = (int) strtoul (optarg, &s, 0);
205 if (s != NULL && *s != 0)
206 fatal (_("invalid integer argument %s"), optarg);
207 break;
208
209 case 'w':
210 include_all_whitespace = TRUE;
211 break;
212
213 case 'o':
214 print_addresses = TRUE;
215 address_radix = 8;
216 break;
217
218 case 't':
219 print_addresses = TRUE;
220 if (optarg[1] != '\0')
221 usage (stderr, 1);
222 switch (optarg[0])
223 {
224 case 'o':
225 address_radix = 8;
226 break;
227
228 case 'd':
229 address_radix = 10;
230 break;
231
232 case 'x':
233 address_radix = 16;
234 break;
235
236 default:
237 usage (stderr, 1);
238 }
239 break;
240
241 case 'T':
242 target = optarg;
243 break;
244
245 case 'e':
246 if (optarg[1] != '\0')
247 usage (stderr, 1);
248 encoding = optarg[0];
249 break;
250
251 case 'V':
252 case 'v':
253 print_version ("strings");
254 break;
255
256 case '?':
257 usage (stderr, 1);
258
259 default:
260 numeric_opt = optind;
261 break;
262 }
263 }
264
265 if (numeric_opt != 0)
266 {
267 string_min = (int) strtoul (argv[numeric_opt - 1] + 1, &s, 0);
268 if (s != NULL && *s != 0)
269 fatal (_("invalid integer argument %s"), argv[numeric_opt - 1] + 1);
270 }
271 if (string_min < 1)
272 fatal (_("invalid minimum string length %d"), string_min);
273
274 switch (encoding)
275 {
276 case 'S':
277 case 's':
278 encoding_bytes = 1;
279 break;
280 case 'b':
281 case 'l':
282 encoding_bytes = 2;
283 break;
284 case 'B':
285 case 'L':
286 encoding_bytes = 4;
287 break;
288 default:
289 usage (stderr, 1);
290 }
291
292 bfd_init ();
293 set_default_bfd_target ();
294
295 if (optind >= argc)
296 {
297 datasection_only = FALSE;
298 SET_BINARY (fileno (stdin));
299 print_strings ("{standard input}", stdin, 0, 0, 0, (char *) NULL);
300 files_given = TRUE;
301 }
302 else
303 {
304 for (; optind < argc; ++optind)
305 {
306 if (strcmp (argv[optind], "-") == 0)
307 datasection_only = FALSE;
308 else
309 {
310 files_given = TRUE;
311 exit_status |= strings_file (argv[optind]) == FALSE;
312 }
313 }
314 }
315
316 if (!files_given)
317 usage (stderr, 1);
318
319 return (exit_status);
320 }
321 \f
322 /* Scan section SECT of the file ABFD, whose printable name is in
323 ARG->filename and whose size might be in ARG->filesize. If it
324 contains initialized data set `got_a_section' and print the
325 strings in it.
326
327 FIXME: We ought to be able to return error codes/messages for
328 certain conditions. */
329
330 static void
331 strings_a_section (bfd *abfd, asection *sect, void *arg)
332 {
333 filename_and_size_t * filename_and_sizep;
334 bfd_size_type *filesizep;
335 bfd_size_type sectsize;
336 void *mem;
337
338 if ((sect->flags & DATA_FLAGS) != DATA_FLAGS)
339 return;
340
341 sectsize = bfd_get_section_size (sect);
342
343 if (sectsize <= 0)
344 return;
345
346 /* Get the size of the file. This might have been cached for us. */
347 filename_and_sizep = (filename_and_size_t *) arg;
348 filesizep = & filename_and_sizep->filesize;
349
350 if (*filesizep == 0)
351 {
352 struct stat st;
353
354 if (bfd_stat (abfd, &st))
355 return;
356
357 /* Cache the result so that we do not repeatedly stat this file. */
358 *filesizep = st.st_size;
359 }
360
361 /* Compare the size of the section against the size of the file.
362 If the section is bigger then the file must be corrupt and
363 we should not try dumping it. */
364 if (sectsize >= *filesizep)
365 return;
366
367 mem = xmalloc (sectsize);
368
369 if (bfd_get_section_contents (abfd, sect, mem, (file_ptr) 0, sectsize))
370 {
371 got_a_section = TRUE;
372
373 print_strings (filename_and_sizep->filename, NULL, sect->filepos,
374 0, sectsize, (char *) mem);
375 }
376
377 free (mem);
378 }
379
380 /* Scan all of the sections in FILE, and print the strings
381 in the initialized data section(s).
382
383 Return TRUE if successful,
384 FALSE if not (such as if FILE is not an object file). */
385
386 static bfd_boolean
387 strings_object_file (const char *file)
388 {
389 filename_and_size_t filename_and_size;
390 bfd *abfd;
391
392 abfd = bfd_openr (file, target);
393
394 if (abfd == NULL)
395 /* Treat the file as a non-object file. */
396 return FALSE;
397
398 /* This call is mainly for its side effect of reading in the sections.
399 We follow the traditional behavior of `strings' in that we don't
400 complain if we don't recognize a file to be an object file. */
401 if (!bfd_check_format (abfd, bfd_object))
402 {
403 bfd_close (abfd);
404 return FALSE;
405 }
406
407 got_a_section = FALSE;
408 filename_and_size.filename = file;
409 filename_and_size.filesize = 0;
410 bfd_map_over_sections (abfd, strings_a_section, & filename_and_size);
411
412 if (!bfd_close (abfd))
413 {
414 bfd_nonfatal (file);
415 return FALSE;
416 }
417
418 return got_a_section;
419 }
420
421 /* Print the strings in FILE. Return TRUE if ok, FALSE if an error occurs. */
422
423 static bfd_boolean
424 strings_file (char *file)
425 {
426 struct stat st;
427
428 /* get_file_size does not support non-S_ISREG files. */
429
430 if (stat (file, &st) < 0)
431 {
432 if (errno == ENOENT)
433 non_fatal (_("'%s': No such file"), file);
434 else
435 non_fatal (_("Warning: could not locate '%s'. reason: %s"),
436 file, strerror (errno));
437 return FALSE;
438 }
439
440 /* If we weren't told to scan the whole file,
441 try to open it as an object file and only look at
442 initialized data sections. If that fails, fall back to the
443 whole file. */
444 if (!datasection_only || !strings_object_file (file))
445 {
446 FILE *stream;
447
448 stream = fopen (file, FOPEN_RB);
449 if (stream == NULL)
450 {
451 fprintf (stderr, "%s: ", program_name);
452 perror (file);
453 return FALSE;
454 }
455
456 print_strings (file, stream, (file_ptr) 0, 0, 0, (char *) 0);
457
458 if (fclose (stream) == EOF)
459 {
460 fprintf (stderr, "%s: ", program_name);
461 perror (file);
462 return FALSE;
463 }
464 }
465
466 return TRUE;
467 }
468 \f
469 /* Read the next character, return EOF if none available.
470 Assume that STREAM is positioned so that the next byte read
471 is at address ADDRESS in the file.
472
473 If STREAM is NULL, do not read from it.
474 The caller can supply a buffer of characters
475 to be processed before the data in STREAM.
476 MAGIC is the address of the buffer and
477 MAGICCOUNT is how many characters are in it. */
478
479 static long
480 get_char (FILE *stream, file_ptr *address, int *magiccount, char **magic)
481 {
482 int c, i;
483 long r = 0;
484
485 for (i = 0; i < encoding_bytes; i++)
486 {
487 if (*magiccount)
488 {
489 (*magiccount)--;
490 c = *(*magic)++;
491 }
492 else
493 {
494 if (stream == NULL)
495 return EOF;
496
497 /* Only use getc_unlocked if we found a declaration for it.
498 Otherwise, libc is not thread safe by default, and we
499 should not use it. */
500
501 #if defined(HAVE_GETC_UNLOCKED) && HAVE_DECL_GETC_UNLOCKED
502 c = getc_unlocked (stream);
503 #else
504 c = getc (stream);
505 #endif
506 if (c == EOF)
507 return EOF;
508 }
509
510 (*address)++;
511 r = (r << 8) | (c & 0xff);
512 }
513
514 switch (encoding)
515 {
516 default:
517 break;
518 case 'l':
519 r = ((r & 0xff) << 8) | ((r & 0xff00) >> 8);
520 break;
521 case 'L':
522 r = (((r & 0xff) << 24) | ((r & 0xff00) << 8)
523 | ((r & 0xff0000) >> 8) | ((r & 0xff000000) >> 24));
524 break;
525 }
526
527 return r;
528 }
529 \f
530 /* Find the strings in file FILENAME, read from STREAM.
531 Assume that STREAM is positioned so that the next byte read
532 is at address ADDRESS in the file.
533 Stop reading at address STOP_POINT in the file, if nonzero.
534
535 If STREAM is NULL, do not read from it.
536 The caller can supply a buffer of characters
537 to be processed before the data in STREAM.
538 MAGIC is the address of the buffer and
539 MAGICCOUNT is how many characters are in it.
540 Those characters come at address ADDRESS and the data in STREAM follow. */
541
542 static void
543 print_strings (const char *filename, FILE *stream, file_ptr address,
544 int stop_point, int magiccount, char *magic)
545 {
546 char *buf = (char *) xmalloc (sizeof (char) * (string_min + 1));
547
548 while (1)
549 {
550 file_ptr start;
551 int i;
552 long c;
553
554 /* See if the next `string_min' chars are all graphic chars. */
555 tryline:
556 if (stop_point && address >= stop_point)
557 break;
558 start = address;
559 for (i = 0; i < string_min; i++)
560 {
561 c = get_char (stream, &address, &magiccount, &magic);
562 if (c == EOF)
563 {
564 free (buf);
565 return;
566 }
567 if (! STRING_ISGRAPHIC (c))
568 /* Found a non-graphic. Try again starting with next char. */
569 goto tryline;
570 buf[i] = c;
571 }
572
573 /* We found a run of `string_min' graphic characters. Print up
574 to the next non-graphic character. */
575
576 if (print_filenames)
577 printf ("%s: ", filename);
578 if (print_addresses)
579 switch (address_radix)
580 {
581 case 8:
582 #ifdef HAVE_LONG_LONG
583 if (sizeof (start) > sizeof (long))
584 {
585 # ifndef __MSVCRT__
586 printf ("%7llo ", (unsigned long long) start);
587 # else
588 printf ("%7I64o ", (unsigned long long) start);
589 # endif
590 }
591 else
592 #elif !BFD_HOST_64BIT_LONG
593 if (start != (unsigned long) start)
594 printf ("++%7lo ", (unsigned long) start);
595 else
596 #endif
597 printf ("%7lo ", (unsigned long) start);
598 break;
599
600 case 10:
601 #ifdef HAVE_LONG_LONG
602 if (sizeof (start) > sizeof (long))
603 {
604 # ifndef __MSVCRT__
605 printf ("%7lld ", (unsigned long long) start);
606 # else
607 printf ("%7I64d ", (unsigned long long) start);
608 # endif
609 }
610 else
611 #elif !BFD_HOST_64BIT_LONG
612 if (start != (unsigned long) start)
613 printf ("++%7ld ", (unsigned long) start);
614 else
615 #endif
616 printf ("%7ld ", (long) start);
617 break;
618
619 case 16:
620 #ifdef HAVE_LONG_LONG
621 if (sizeof (start) > sizeof (long))
622 {
623 # ifndef __MSVCRT__
624 printf ("%7llx ", (unsigned long long) start);
625 # else
626 printf ("%7I64x ", (unsigned long long) start);
627 # endif
628 }
629 else
630 #elif !BFD_HOST_64BIT_LONG
631 if (start != (unsigned long) start)
632 printf ("%lx%8.8lx ", (unsigned long) (start >> 32),
633 (unsigned long) (start & 0xffffffff));
634 else
635 #endif
636 printf ("%7lx ", (unsigned long) start);
637 break;
638 }
639
640 buf[i] = '\0';
641 fputs (buf, stdout);
642
643 while (1)
644 {
645 c = get_char (stream, &address, &magiccount, &magic);
646 if (c == EOF)
647 break;
648 if (! STRING_ISGRAPHIC (c))
649 break;
650 putchar (c);
651 }
652
653 putchar ('\n');
654 }
655 free (buf);
656 }
657 \f
658 static void
659 usage (FILE *stream, int status)
660 {
661 fprintf (stream, _("Usage: %s [option(s)] [file(s)]\n"), program_name);
662 fprintf (stream, _(" Display printable strings in [file(s)] (stdin by default)\n"));
663 fprintf (stream, _(" The options are:\n"));
664
665 if (DEFAULT_STRINGS_ALL)
666 fprintf (stream, _("\
667 -a - --all Scan the entire file, not just the data section [default]\n\
668 -d --data Only scan the data sections in the file\n"));
669 else
670 fprintf (stream, _("\
671 -a - --all Scan the entire file, not just the data section\n\
672 -d --data Only scan the data sections in the file [default]\n"));
673
674 fprintf (stream, _("\
675 -f --print-file-name Print the name of the file before each string\n\
676 -n --bytes=[number] Locate & print any NUL-terminated sequence of at\n\
677 -<number> least [number] characters (default 4).\n\
678 -t --radix={o,d,x} Print the location of the string in base 8, 10 or 16\n\
679 -w --include-all-whitespace Include all whitespace as valid string characters\n\
680 -o An alias for --radix=o\n\
681 -T --target=<BFDNAME> Specify the binary file format\n\
682 -e --encoding={s,S,b,l,B,L} Select character size and endianness:\n\
683 s = 7-bit, S = 8-bit, {b,l} = 16-bit, {B,L} = 32-bit\n\
684 @<file> Read options from <file>\n\
685 -h --help Display this information\n\
686 -v -V --version Print the program's version number\n"));
687 list_supported_targets (program_name, stream);
688 if (REPORT_BUGS_TO[0] && status == 0)
689 fprintf (stream, _("Report bugs to %s\n"), REPORT_BUGS_TO);
690 exit (status);
691 }
This page took 0.042862 seconds and 5 git commands to generate.