PR gas/5322
[deliverable/binutils-gdb.git] / binutils / rclex.c
CommitLineData
4a594fce
NC
1/* rclex.c -- lexer for Windows rc files parser */
2
3/* Copyright 1997, 1998, 1999, 2001, 2002, 2003, 2005, 2006, 2007
4 Free Software Foundation, Inc.
5
6 Written by Kai Tietz, Onevision.
7
8 This file is part of GNU Binutils.
9
10 This program is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
32866df7 12 the Free Software Foundation; either version 3 of the License, or
4a594fce
NC
13 (at your option) any later version.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
23 02110-1301, USA. */
24
32866df7 25
4a594fce
NC
26/* This is a lexer used by the Windows rc file parser. It basically
27 just recognized a bunch of keywords. */
28
29#include "sysdep.h"
30#include "bfd.h"
31#include "bucomm.h"
32#include "libiberty.h"
33#include "safe-ctype.h"
34#include "windres.h"
35#include "rcparse.h"
36
37#include <assert.h>
38
39/* Whether we are in rcdata mode, in which we returns the lengths of
40 strings. */
41
42static int rcdata_mode;
43
44/* Whether we are supressing lines from cpp (including windows.h or
45 headers from your C sources may bring in externs and typedefs).
46 When active, we return IGNORED_TOKEN, which lets us ignore these
47 outside of resource constructs. Thus, it isn't required to protect
48 all the non-preprocessor lines in your header files with #ifdef
49 RC_INVOKED. It also means your RC file can't include other RC
50 files if they're named "*.h". Sorry. Name them *.rch or whatever. */
51
52static int suppress_cpp_data;
53
54#define IGNORE_CPP(x) (suppress_cpp_data ? IGNORED_TOKEN : (x))
55
56/* The first filename we detect in the cpp output. We use this to
57 tell included files from the original file. */
58
59static char *initial_fn;
60
61/* List of allocated strings. */
62
63struct alloc_string
64{
65 struct alloc_string *next;
66 char *s;
67};
68
69static struct alloc_string *strings;
70
71struct rclex_keywords
72{
73 const char *name;
74 int tok;
75};
76
77#define K(KEY) { #KEY, KEY }
78#define KRT(KEY) { #KEY, RT_##KEY }
79
80static const struct rclex_keywords keywds[] =
81{
82 K(ACCELERATORS), K(ALT), K(ANICURSOR), K(ANIICON), K(ASCII),
83 K(AUTO3STATE), K(AUTOCHECKBOX), K(AUTORADIOBUTTON),
84 K(BEDIT), { "BEGIN", BEG }, K(BITMAP), K(BLOCK), K(BUTTON),
85 K(CAPTION), K(CHARACTERISTICS), K(CHECKBOX), K(CHECKED),
86 K(CLASS), K(COMBOBOX), K(CONTROL), K(CTEXT), K(CURSOR),
87 K(DEFPUSHBUTTON), K(DIALOG), K(DIALOGEX), K(DISCARDABLE),
88 K(DLGINCLUDE), K(DLGINIT),
89 K(EDITTEXT), K(END), K(EXSTYLE),
90 K(FILEFLAGS), K(FILEFLAGSMASK), K(FILEOS), K(FILESUBTYPE),
91 K(FILETYPE), K(FILEVERSION), K(FIXED), K(FONT), K(FONTDIR),
92 K(GRAYED), KRT(GROUP_CURSOR), KRT(GROUP_ICON), K(GROUPBOX),
93 K(HEDIT), K(HELP), K(HTML),
94 K(ICON), K(IEDIT), K(IMPURE), K(INACTIVE),
95 K(LANGUAGE), K(LISTBOX), K(LOADONCALL), K(LTEXT),
96 K(MANIFEST), K(MENU), K(MENUBARBREAK), K(MENUBREAK),
97 K(MENUEX), K(MENUITEM), K(MESSAGETABLE), K(MOVEABLE),
98 K(NOINVERT), K(NOT),
99 K(PLUGPLAY), K(POPUP), K(PRELOAD), K(PRODUCTVERSION),
100 K(PURE), K(PUSHBOX), K(PUSHBUTTON),
101 K(RADIOBUTTON), K(RCDATA), K(RTEXT),
102 K(SCROLLBAR), K(SEPARATOR), K(SHIFT), K(STATE3),
103 K(STRINGTABLE), K(STYLE),
104 K(TOOLBAR),
105 K(USERBUTTON),
106 K(VALUE), { "VERSION", VERSIONK }, K(VERSIONINFO),
107 K(VIRTKEY), K(VXD),
108 { NULL, 0 },
109};
110
111/* External input stream from resrc */
112extern FILE *cpp_pipe;
113
114/* Lexical scanner helpers. */
115static int rclex_lastch = -1;
116static size_t rclex_tok_max = 0;
117static size_t rclex_tok_pos = 0;
118static char *rclex_tok = NULL;
119
120static int
121rclex_translatekeyword (const char *key)
122{
123 if (key && ISUPPER (key[0]))
124 {
125 const struct rclex_keywords *kw = &keywds[0];
126
127 do
128 {
129 if (! strcmp (kw->name, key))
130 return kw->tok;
131 ++kw;
132 }
133 while (kw->name != NULL);
134 }
135 return STRING;
136}
137
138/* Handle a C preprocessor line. */
139
140static void
141cpp_line (void)
142{
143 const char *s = rclex_tok;
144 int line;
145 char *send, *fn;
d856f2dd 146 size_t len, mlen;
4a594fce
NC
147
148 ++s;
149 while (ISSPACE (*s))
150 ++s;
151
d856f2dd
NC
152 /* Check for #pragma code_page ( DEFAULT | <nr>). */
153 len = strlen (s);
154 mlen = strlen ("pragma");
155 if (len > mlen && memcmp (s, "pragma", mlen) == 0 && ISSPACE (s[mlen]))
156 {
157 const char *end;
158
159 s += mlen + 1;
160 while (ISSPACE (*s))
161 ++s;
162 len = strlen (s);
163 mlen = strlen ("code_page");
164 if (len <= mlen || memcmp (s, "code_page", mlen) != 0)
165 /* FIXME: We ought to issue a warning message about an unrecognised pragma. */
166 return;
167 s += mlen;
168 while (ISSPACE (*s))
169 ++s;
170 if (*s != '(')
171 /* FIXME: We ought to issue an error message about a malformed pragma. */
172 return;
173 ++s;
174 while (ISSPACE (*s))
175 ++s;
176 if (*s == 0 || (end = strchr (s, ')')) == NULL)
177 /* FIXME: We ought to issue an error message about a malformed pragma. */
178 return;
179 len = (size_t) (end - s);
180 fn = xmalloc (len + 1);
181 if (len)
182 memcpy (fn, s, len);
183 fn[len] = 0;
184 while (len > 0 && (fn[len - 1] > 0 && fn[len - 1] <= 0x20))
185 fn[--len] = 0;
186 if (! len || (len == strlen ("DEFAULT") && strcasecmp (fn, "DEFAULT") == 0))
187 wind_current_codepage = wind_default_codepage;
188 else if (len > 0)
189 {
190 rc_uint_type ncp;
191
192 if (fn[0] == '0' && (fn[1] == 'x' || fn[1] == 'X'))
193 ncp = (rc_uint_type) strtol (fn + 2, NULL, 16);
194 else
195 ncp = (rc_uint_type) strtol (fn, NULL, 10);
196 if (ncp == CP_UTF16 || ! unicode_is_valid_codepage (ncp))
197 fatal (_("invalid value specified for pragma code_page.\n"));
198 wind_current_codepage = ncp;
199 }
200 free (fn);
201 return;
202 }
203
4a594fce
NC
204 line = strtol (s, &send, 0);
205 if (*send != '\0' && ! ISSPACE (*send))
206 return;
207
208 /* Subtract 1 because we are about to count the newline. */
209 rc_lineno = line - 1;
210
211 s = send;
212 while (ISSPACE (*s))
213 ++s;
214
215 if (*s != '"')
216 return;
217
218 ++s;
219 send = strchr (s, '"');
220 if (send == NULL)
221 return;
222
223 fn = xmalloc (send - s + 1);
224 strncpy (fn, s, send - s);
225 fn[send - s] = '\0';
226
227 free (rc_filename);
228 rc_filename = fn;
229
230 if (! initial_fn)
231 {
232 initial_fn = xmalloc (strlen (fn) + 1);
233 strcpy (initial_fn, fn);
234 }
235
236 /* Allow the initial file, regardless of name. Suppress all other
237 files if they end in ".h" (this allows included "*.rc"). */
238 if (strcmp (initial_fn, fn) == 0
239 || strcmp (fn + strlen (fn) - 2, ".h") != 0)
240 suppress_cpp_data = 0;
241 else
242 suppress_cpp_data = 1;
243}
244
245/* Allocate a string of a given length. */
246
247static char *
248get_string (int len)
249{
250 struct alloc_string *as;
251
252 as = xmalloc (sizeof *as);
253 as->s = xmalloc (len);
254
255 as->next = strings;
256 strings = as;
257
258 return as->s;
259}
260
261/* Handle a quoted string. The quotes are stripped. A pair of quotes
262 in a string are turned into a single quote. Adjacent strings are
263 merged separated by whitespace are merged, as in C. */
264
265static char *
266handle_quotes (rc_uint_type *len)
267{
268 const char *input = rclex_tok;
269 char *ret, *s;
270 const char *t;
271 int ch;
272 int num_xdigits;
273
274 ret = get_string (strlen (input) + 1);
275
276 s = ret;
277 t = input;
278 if (*t == '"')
279 ++t;
280 while (*t != '\0')
281 {
282 if (*t == '\\')
283 {
284 ++t;
285 switch (*t)
286 {
287 case '\0':
288 rcparse_warning ("backslash at end of string");
289 break;
290
291 case '\"':
292 rcparse_warning ("use \"\" to put \" in a string");
293 *s++ = '"';
294 ++t;
295 break;
296
297 case 'a':
298 *s++ = ESCAPE_B; /* Strange, but true... */
299 ++t;
300 break;
301
302 case 'b':
303 *s++ = ESCAPE_B;
304 ++t;
305 break;
306
307 case 'f':
308 *s++ = ESCAPE_F;
309 ++t;
310 break;
311
312 case 'n':
313 *s++ = ESCAPE_N;
314 ++t;
315 break;
316
317 case 'r':
318 *s++ = ESCAPE_R;
319 ++t;
320 break;
321
322 case 't':
323 *s++ = ESCAPE_T;
324 ++t;
325 break;
326
327 case 'v':
328 *s++ = ESCAPE_V;
329 ++t;
330 break;
331
332 case '\\':
333 *s++ = *t++;
334 break;
335
336 case '0': case '1': case '2': case '3':
337 case '4': case '5': case '6': case '7':
338 ch = *t - '0';
339 ++t;
340 if (*t >= '0' && *t <= '7')
341 {
342 ch = (ch << 3) | (*t - '0');
343 ++t;
344 if (*t >= '0' && *t <= '7')
345 {
346 ch = (ch << 3) | (*t - '0');
347 ++t;
348 }
349 }
350 *s++ = ch;
351 break;
352
353 case 'x': case 'X':
354 ++t;
355 ch = 0;
356 /* We only handle single byte chars here. Make sure
357 we finish an escape sequence like "/xB0ABC" after
358 the first two digits. */
359 num_xdigits = 2;
360 while (num_xdigits--)
361 {
362 if (*t >= '0' && *t <= '9')
363 ch = (ch << 4) | (*t - '0');
364 else if (*t >= 'a' && *t <= 'f')
365 ch = (ch << 4) | (*t - 'a' + 10);
366 else if (*t >= 'A' && *t <= 'F')
367 ch = (ch << 4) | (*t - 'A' + 10);
368 else
369 break;
370 ++t;
371 }
372 *s++ = ch;
373 break;
374
375 default:
376 rcparse_warning ("unrecognized escape sequence");
377 *s++ = '\\';
378 *s++ = *t++;
379 break;
380 }
381 }
382 else if (*t != '"')
383 *s++ = *t++;
384 else if (t[1] == '\0')
385 break;
386 else if (t[1] == '"')
387 {
388 *s++ = '"';
389 t += 2;
390 }
391 else
392 {
393 rcparse_warning ("unexpected character after '\"'");
394 ++t;
395 assert (ISSPACE (*t));
396 while (ISSPACE (*t))
397 {
398 if ((*t) == '\n')
399 ++rc_lineno;
400 ++t;
401 }
402 if (*t == '\0')
403 break;
404 assert (*t == '"');
405 ++t;
406 }
407 }
408
409 *s = '\0';
410
411 *len = s - ret;
412
413 return ret;
414}
415
416/* Allocate a unicode string of a given length. */
417
418static unichar *
419get_unistring (int len)
420{
421 return (unichar *) get_string (len * sizeof (unichar));
422}
423
424/* Handle a quoted unicode string. The quotes are stripped. A pair of quotes
425 in a string are turned into a single quote. Adjacent strings are
426 merged separated by whitespace are merged, as in C. */
427
428static unichar *
429handle_uniquotes (rc_uint_type *len)
430{
431 const char *input = rclex_tok;
432 unichar *ret, *s;
433 const char *t;
434 int ch;
435 int num_xdigits;
436
437 ret = get_unistring (strlen (input) + 1);
438
439 s = ret;
440 t = input;
441 if ((*t == 'L' || *t == 'l') && t[1] == '"')
442 t += 2;
443 else if (*t == '"')
444 ++t;
445 while (*t != '\0')
446 {
447 if (*t == '\\')
448 {
449 ++t;
450 switch (*t)
451 {
452 case '\0':
453 rcparse_warning ("backslash at end of string");
454 break;
455
456 case '\"':
457 rcparse_warning ("use \"\" to put \" in a string");
458 break;
459
460 case 'a':
461 *s++ = ESCAPE_B; /* Strange, but true... */
462 ++t;
463 break;
464
465 case 'b':
466 *s++ = ESCAPE_B;
467 ++t;
468 break;
469
470 case 'f':
471 *s++ = ESCAPE_F;
472 ++t;
473 break;
474
475 case 'n':
476 *s++ = ESCAPE_N;
477 ++t;
478 break;
479
480 case 'r':
481 *s++ = ESCAPE_R;
482 ++t;
483 break;
484
485 case 't':
486 *s++ = ESCAPE_T;
487 ++t;
488 break;
489
490 case 'v':
491 *s++ = ESCAPE_V;
492 ++t;
493 break;
494
495 case '\\':
496 *s++ = (unichar) *t++;
497 break;
498
499 case '0': case '1': case '2': case '3':
500 case '4': case '5': case '6': case '7':
501 ch = *t - '0';
502 ++t;
503 if (*t >= '0' && *t <= '7')
504 {
505 ch = (ch << 3) | (*t - '0');
506 ++t;
507 if (*t >= '0' && *t <= '7')
508 {
509 ch = (ch << 3) | (*t - '0');
510 ++t;
511 }
512 }
513 *s++ = (unichar) ch;
514 break;
515
516 case 'x': case 'X':
517 ++t;
518 ch = 0;
519 /* We only handle two byte chars here. Make sure
520 we finish an escape sequence like "/xB0ABC" after
521 the first two digits. */
522 num_xdigits = 4;
523 while (num_xdigits--)
524 {
525 if (*t >= '0' && *t <= '9')
526 ch = (ch << 4) | (*t - '0');
527 else if (*t >= 'a' && *t <= 'f')
528 ch = (ch << 4) | (*t - 'a' + 10);
529 else if (*t >= 'A' && *t <= 'F')
530 ch = (ch << 4) | (*t - 'A' + 10);
531 else
532 break;
533 ++t;
534 }
535 *s++ = (unichar) ch;
536 break;
537
538 default:
539 rcparse_warning ("unrecognized escape sequence");
540 *s++ = '\\';
541 *s++ = (unichar) *t++;
542 break;
543 }
544 }
545 else if (*t != '"')
546 *s++ = (unichar) *t++;
547 else if (t[1] == '\0')
548 break;
549 else if (t[1] == '"')
550 {
551 *s++ = '"';
552 t += 2;
553 }
554 else
555 {
556 ++t;
557 assert (ISSPACE (*t));
558 while (ISSPACE (*t))
559 {
560 if ((*t) == '\n')
561 ++rc_lineno;
562 ++t;
563 }
564 if (*t == '\0')
565 break;
566 assert (*t == '"');
567 ++t;
568 }
569 }
570
571 *s = '\0';
572
573 *len = s - ret;
574
575 return ret;
576}
577
578/* Discard all the strings we have allocated. The parser calls this
579 when it no longer needs them. */
580
581void
582rcparse_discard_strings (void)
583{
584 struct alloc_string *as;
585
586 as = strings;
587 while (as != NULL)
588 {
589 struct alloc_string *n;
590
591 free (as->s);
592 n = as->next;
593 free (as);
594 as = n;
595 }
596
597 strings = NULL;
598}
599
600/* Enter rcdata mode. */
601void
602rcparse_rcdata (void)
603{
604 rcdata_mode = 1;
605}
606
607/* Go back to normal mode from rcdata mode. */
608void
609rcparse_normal (void)
610{
611 rcdata_mode = 0;
612}
613
614static void
615rclex_tok_add_char (int ch)
616{
617 if (! rclex_tok || rclex_tok_max <= rclex_tok_pos)
618 {
619 char *h = xmalloc (rclex_tok_max + 9);
620
621 if (! h)
622 abort ();
623 if (rclex_tok)
624 {
625 memcpy (h, rclex_tok, rclex_tok_pos + 1);
626 free (rclex_tok);
627 }
628 else
629 rclex_tok_pos = 0;
630 rclex_tok_max += 8;
631 rclex_tok = h;
632 }
633 if (ch != -1)
634 rclex_tok[rclex_tok_pos++] = (char) ch;
635 rclex_tok[rclex_tok_pos] = 0;
636}
637
638static int
639rclex_readch (void)
640{
641 int r = -1;
642
643 if ((r = rclex_lastch) != -1)
644 rclex_lastch = -1;
645 else
646 {
647 char ch;
648 do
649 {
650 if (! cpp_pipe || feof (cpp_pipe)
651 || fread (&ch, 1, 1,cpp_pipe) != 1)
652 break;
653 r = ((int) ch) & 0xff;
654 }
655 while (r == 0 || r == '\r');
656 }
657 rclex_tok_add_char (r);
658 return r;
659}
660
661static int
662rclex_peekch (void)
663{
664 int r;
665
666 if ((r = rclex_lastch) == -1)
667 {
668 if ((r = rclex_readch ()) != -1)
669 {
670 rclex_lastch = r;
671 if (rclex_tok_pos > 0)
672 rclex_tok[--rclex_tok_pos] = 0;
673 }
674 }
675 return r;
676}
677
678static void
679rclex_string (void)
680{
681 int c;
682
683 while ((c = rclex_peekch ()) != -1)
684 {
685 if (c == '\n')
686 break;
687 if (c == '\\')
688 {
689 rclex_readch ();
690 if ((c = rclex_peekch ()) == -1 || c == '\n')
691 break;
692 rclex_readch ();
693 }
694 else if (rclex_readch () == '"')
695 {
696 if (rclex_peekch () == '"')
697 rclex_readch ();
698 else
699 break;
700 }
701 }
702}
703
704static rc_uint_type
705read_digit (int ch)
706{
707 rc_uint_type base = 10;
708 rc_uint_type ret, val;
709 int warned = 0;
710
711 ret = 0;
712 if (ch == '0')
713 {
714 base = 8;
715 switch (rclex_peekch ())
716 {
717 case 'o': case 'O':
718 rclex_readch ();
719 base = 8;
720 break;
721
722 case 'x': case 'X':
723 rclex_readch ();
724 base = 16;
725 break;
726 }
727 }
728 else
729 ret = (rc_uint_type) (ch - '0');
730 while ((ch = rclex_peekch ()) != -1)
731 {
732 if (ISDIGIT (ch))
733 val = (rc_uint_type) (ch - '0');
734 else if (ch >= 'a' && ch <= 'f')
735 val = (rc_uint_type) ((ch - 'a') + 10);
736 else if (ch >= 'A' && ch <= 'F')
737 val = (rc_uint_type) ((ch - 'A') + 10);
738 else
739 break;
740 rclex_readch ();
741 if (! warned && val >= base)
742 {
743 warned = 1;
744 rcparse_warning ("digit exceeds base");
745 }
746 ret *= base;
747 ret += val;
748 }
749 return ret;
750}
751
752/* yyparser entry method. */
753
754int
755yylex (void)
756{
757 char *s;
758 unichar *us;
759 rc_uint_type length;
760 int ch;
761
762 /* Make sure that rclex_tok is initialized. */
763 if (! rclex_tok)
764 rclex_tok_add_char (-1);
765
766 do
767 {
768 do
769 {
770 /* Clear token. */
771 rclex_tok_pos = 0;
772 rclex_tok[0] = 0;
773
774 if ((ch = rclex_readch ()) == -1)
775 return -1;
776 if (ch == '\n')
777 ++rc_lineno;
778 }
779 while (ch <= 0x20);
780
781 switch (ch)
782 {
783 case '#':
784 while ((ch = rclex_peekch ()) != -1 && ch != '\n')
785 rclex_readch ();
786 cpp_line ();
787 ch = IGNORED_TOKEN;
788 break;
789
790 case '{':
791 ch = IGNORE_CPP (BEG);
792 break;
793
794 case '}':
795 ch = IGNORE_CPP (END);
796 break;
797
798 case '0': case '1': case '2': case '3': case '4':
799 case '5': case '6': case '7': case '8': case '9':
800 yylval.i.val = read_digit (ch);
801 yylval.i.dword = 0;
802 switch (rclex_peekch ())
803 {
804 case 'l': case 'L':
805 rclex_readch ();
806 yylval.i.dword = 1;
807 break;
808 }
809 ch = IGNORE_CPP (NUMBER);
810 break;
811 case '"':
812 rclex_string ();
813 ch = IGNORE_CPP ((! rcdata_mode ? QUOTEDSTRING : SIZEDSTRING));
814 if (ch == IGNORED_TOKEN)
815 break;
816 s = handle_quotes (&length);
817 if (! rcdata_mode)
818 yylval.s = s;
819 else
820 {
821 yylval.ss.length = length;
822 yylval.ss.s = s;
823 }
824 break;
825 case 'L': case 'l':
826 if (rclex_peekch () == '"')
827 {
828 rclex_readch ();
829 rclex_string ();
830 ch = IGNORE_CPP ((! rcdata_mode ? QUOTEDUNISTRING : SIZEDUNISTRING));
831 if (ch == IGNORED_TOKEN)
832 break;
833 us = handle_uniquotes (&length);
834 if (! rcdata_mode)
835 yylval.uni = us;
836 else
837 {
838 yylval.suni.length = length;
839 yylval.suni.s = us;
840 }
841 break;
842 }
843 /* Fall through. */
844 default:
845 if (ISIDST (ch) || ch=='$')
846 {
847 while ((ch = rclex_peekch ()) != -1 && (ISIDNUM (ch) || ch == '$' || ch == '.'))
848 rclex_readch ();
849 ch = IGNORE_CPP (rclex_translatekeyword (rclex_tok));
850 if (ch == STRING)
851 {
852 s = get_string (strlen (rclex_tok) + 1);
853 strcpy (s, rclex_tok);
854 yylval.s = s;
855 }
856 else if (ch == BLOCK)
857 {
858 const char *hs = NULL;
859
860 switch (yylex ())
861 {
862 case STRING:
863 case QUOTEDSTRING:
864 hs = yylval.s;
865 break;
866 case SIZEDSTRING:
867 hs = yylval.s = yylval.ss.s;
868 break;
869 }
870 if (! hs)
871 {
872 rcparse_warning ("BLOCK expects a string as argument.");
873 ch = IGNORED_TOKEN;
874 }
875 else if (! strcmp (hs, "StringFileInfo"))
876 ch = BLOCKSTRINGFILEINFO;
877 else if (! strcmp (hs, "VarFileInfo"))
878 ch = BLOCKVARFILEINFO;
879 }
880 break;
881 }
882 ch = IGNORE_CPP (ch);
883 break;
884 }
885 }
886 while (ch == IGNORED_TOKEN);
887
888 return ch;
889}
This page took 0.156118 seconds and 4 git commands to generate.