2007-07-05 Markus Deuling <deuling@de.ibm.com>
[deliverable/binutils-gdb.git] / binutils / rclex.c
CommitLineData
4a594fce
NC
1/* rclex.c -- lexer for Windows rc files parser */
2
3/* Copyright 1997, 1998, 1999, 2001, 2002, 2003, 2005, 2006, 2007
4 Free Software Foundation, Inc.
5
6 Written by Kai Tietz, Onevision.
7
8 This file is part of GNU Binutils.
9
10 This program is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2 of the License, or
13 (at your option) any later version.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
23 02110-1301, USA. */
24
25/* This is a lexer used by the Windows rc file parser. It basically
26 just recognized a bunch of keywords. */
27
28#include "sysdep.h"
29#include "bfd.h"
30#include "bucomm.h"
31#include "libiberty.h"
32#include "safe-ctype.h"
33#include "windres.h"
34#include "rcparse.h"
35
36#include <assert.h>
37
38/* Whether we are in rcdata mode, in which we returns the lengths of
39 strings. */
40
41static int rcdata_mode;
42
43/* Whether we are supressing lines from cpp (including windows.h or
44 headers from your C sources may bring in externs and typedefs).
45 When active, we return IGNORED_TOKEN, which lets us ignore these
46 outside of resource constructs. Thus, it isn't required to protect
47 all the non-preprocessor lines in your header files with #ifdef
48 RC_INVOKED. It also means your RC file can't include other RC
49 files if they're named "*.h". Sorry. Name them *.rch or whatever. */
50
51static int suppress_cpp_data;
52
53#define IGNORE_CPP(x) (suppress_cpp_data ? IGNORED_TOKEN : (x))
54
55/* The first filename we detect in the cpp output. We use this to
56 tell included files from the original file. */
57
58static char *initial_fn;
59
60/* List of allocated strings. */
61
62struct alloc_string
63{
64 struct alloc_string *next;
65 char *s;
66};
67
68static struct alloc_string *strings;
69
70struct rclex_keywords
71{
72 const char *name;
73 int tok;
74};
75
76#define K(KEY) { #KEY, KEY }
77#define KRT(KEY) { #KEY, RT_##KEY }
78
79static const struct rclex_keywords keywds[] =
80{
81 K(ACCELERATORS), K(ALT), K(ANICURSOR), K(ANIICON), K(ASCII),
82 K(AUTO3STATE), K(AUTOCHECKBOX), K(AUTORADIOBUTTON),
83 K(BEDIT), { "BEGIN", BEG }, K(BITMAP), K(BLOCK), K(BUTTON),
84 K(CAPTION), K(CHARACTERISTICS), K(CHECKBOX), K(CHECKED),
85 K(CLASS), K(COMBOBOX), K(CONTROL), K(CTEXT), K(CURSOR),
86 K(DEFPUSHBUTTON), K(DIALOG), K(DIALOGEX), K(DISCARDABLE),
87 K(DLGINCLUDE), K(DLGINIT),
88 K(EDITTEXT), K(END), K(EXSTYLE),
89 K(FILEFLAGS), K(FILEFLAGSMASK), K(FILEOS), K(FILESUBTYPE),
90 K(FILETYPE), K(FILEVERSION), K(FIXED), K(FONT), K(FONTDIR),
91 K(GRAYED), KRT(GROUP_CURSOR), KRT(GROUP_ICON), K(GROUPBOX),
92 K(HEDIT), K(HELP), K(HTML),
93 K(ICON), K(IEDIT), K(IMPURE), K(INACTIVE),
94 K(LANGUAGE), K(LISTBOX), K(LOADONCALL), K(LTEXT),
95 K(MANIFEST), K(MENU), K(MENUBARBREAK), K(MENUBREAK),
96 K(MENUEX), K(MENUITEM), K(MESSAGETABLE), K(MOVEABLE),
97 K(NOINVERT), K(NOT),
98 K(PLUGPLAY), K(POPUP), K(PRELOAD), K(PRODUCTVERSION),
99 K(PURE), K(PUSHBOX), K(PUSHBUTTON),
100 K(RADIOBUTTON), K(RCDATA), K(RTEXT),
101 K(SCROLLBAR), K(SEPARATOR), K(SHIFT), K(STATE3),
102 K(STRINGTABLE), K(STYLE),
103 K(TOOLBAR),
104 K(USERBUTTON),
105 K(VALUE), { "VERSION", VERSIONK }, K(VERSIONINFO),
106 K(VIRTKEY), K(VXD),
107 { NULL, 0 },
108};
109
110/* External input stream from resrc */
111extern FILE *cpp_pipe;
112
113/* Lexical scanner helpers. */
114static int rclex_lastch = -1;
115static size_t rclex_tok_max = 0;
116static size_t rclex_tok_pos = 0;
117static char *rclex_tok = NULL;
118
119static int
120rclex_translatekeyword (const char *key)
121{
122 if (key && ISUPPER (key[0]))
123 {
124 const struct rclex_keywords *kw = &keywds[0];
125
126 do
127 {
128 if (! strcmp (kw->name, key))
129 return kw->tok;
130 ++kw;
131 }
132 while (kw->name != NULL);
133 }
134 return STRING;
135}
136
137/* Handle a C preprocessor line. */
138
139static void
140cpp_line (void)
141{
142 const char *s = rclex_tok;
143 int line;
144 char *send, *fn;
d856f2dd 145 size_t len, mlen;
4a594fce
NC
146
147 ++s;
148 while (ISSPACE (*s))
149 ++s;
150
d856f2dd
NC
151 /* Check for #pragma code_page ( DEFAULT | <nr>). */
152 len = strlen (s);
153 mlen = strlen ("pragma");
154 if (len > mlen && memcmp (s, "pragma", mlen) == 0 && ISSPACE (s[mlen]))
155 {
156 const char *end;
157
158 s += mlen + 1;
159 while (ISSPACE (*s))
160 ++s;
161 len = strlen (s);
162 mlen = strlen ("code_page");
163 if (len <= mlen || memcmp (s, "code_page", mlen) != 0)
164 /* FIXME: We ought to issue a warning message about an unrecognised pragma. */
165 return;
166 s += mlen;
167 while (ISSPACE (*s))
168 ++s;
169 if (*s != '(')
170 /* FIXME: We ought to issue an error message about a malformed pragma. */
171 return;
172 ++s;
173 while (ISSPACE (*s))
174 ++s;
175 if (*s == 0 || (end = strchr (s, ')')) == NULL)
176 /* FIXME: We ought to issue an error message about a malformed pragma. */
177 return;
178 len = (size_t) (end - s);
179 fn = xmalloc (len + 1);
180 if (len)
181 memcpy (fn, s, len);
182 fn[len] = 0;
183 while (len > 0 && (fn[len - 1] > 0 && fn[len - 1] <= 0x20))
184 fn[--len] = 0;
185 if (! len || (len == strlen ("DEFAULT") && strcasecmp (fn, "DEFAULT") == 0))
186 wind_current_codepage = wind_default_codepage;
187 else if (len > 0)
188 {
189 rc_uint_type ncp;
190
191 if (fn[0] == '0' && (fn[1] == 'x' || fn[1] == 'X'))
192 ncp = (rc_uint_type) strtol (fn + 2, NULL, 16);
193 else
194 ncp = (rc_uint_type) strtol (fn, NULL, 10);
195 if (ncp == CP_UTF16 || ! unicode_is_valid_codepage (ncp))
196 fatal (_("invalid value specified for pragma code_page.\n"));
197 wind_current_codepage = ncp;
198 }
199 free (fn);
200 return;
201 }
202
4a594fce
NC
203 line = strtol (s, &send, 0);
204 if (*send != '\0' && ! ISSPACE (*send))
205 return;
206
207 /* Subtract 1 because we are about to count the newline. */
208 rc_lineno = line - 1;
209
210 s = send;
211 while (ISSPACE (*s))
212 ++s;
213
214 if (*s != '"')
215 return;
216
217 ++s;
218 send = strchr (s, '"');
219 if (send == NULL)
220 return;
221
222 fn = xmalloc (send - s + 1);
223 strncpy (fn, s, send - s);
224 fn[send - s] = '\0';
225
226 free (rc_filename);
227 rc_filename = fn;
228
229 if (! initial_fn)
230 {
231 initial_fn = xmalloc (strlen (fn) + 1);
232 strcpy (initial_fn, fn);
233 }
234
235 /* Allow the initial file, regardless of name. Suppress all other
236 files if they end in ".h" (this allows included "*.rc"). */
237 if (strcmp (initial_fn, fn) == 0
238 || strcmp (fn + strlen (fn) - 2, ".h") != 0)
239 suppress_cpp_data = 0;
240 else
241 suppress_cpp_data = 1;
242}
243
244/* Allocate a string of a given length. */
245
246static char *
247get_string (int len)
248{
249 struct alloc_string *as;
250
251 as = xmalloc (sizeof *as);
252 as->s = xmalloc (len);
253
254 as->next = strings;
255 strings = as;
256
257 return as->s;
258}
259
260/* Handle a quoted string. The quotes are stripped. A pair of quotes
261 in a string are turned into a single quote. Adjacent strings are
262 merged separated by whitespace are merged, as in C. */
263
264static char *
265handle_quotes (rc_uint_type *len)
266{
267 const char *input = rclex_tok;
268 char *ret, *s;
269 const char *t;
270 int ch;
271 int num_xdigits;
272
273 ret = get_string (strlen (input) + 1);
274
275 s = ret;
276 t = input;
277 if (*t == '"')
278 ++t;
279 while (*t != '\0')
280 {
281 if (*t == '\\')
282 {
283 ++t;
284 switch (*t)
285 {
286 case '\0':
287 rcparse_warning ("backslash at end of string");
288 break;
289
290 case '\"':
291 rcparse_warning ("use \"\" to put \" in a string");
292 *s++ = '"';
293 ++t;
294 break;
295
296 case 'a':
297 *s++ = ESCAPE_B; /* Strange, but true... */
298 ++t;
299 break;
300
301 case 'b':
302 *s++ = ESCAPE_B;
303 ++t;
304 break;
305
306 case 'f':
307 *s++ = ESCAPE_F;
308 ++t;
309 break;
310
311 case 'n':
312 *s++ = ESCAPE_N;
313 ++t;
314 break;
315
316 case 'r':
317 *s++ = ESCAPE_R;
318 ++t;
319 break;
320
321 case 't':
322 *s++ = ESCAPE_T;
323 ++t;
324 break;
325
326 case 'v':
327 *s++ = ESCAPE_V;
328 ++t;
329 break;
330
331 case '\\':
332 *s++ = *t++;
333 break;
334
335 case '0': case '1': case '2': case '3':
336 case '4': case '5': case '6': case '7':
337 ch = *t - '0';
338 ++t;
339 if (*t >= '0' && *t <= '7')
340 {
341 ch = (ch << 3) | (*t - '0');
342 ++t;
343 if (*t >= '0' && *t <= '7')
344 {
345 ch = (ch << 3) | (*t - '0');
346 ++t;
347 }
348 }
349 *s++ = ch;
350 break;
351
352 case 'x': case 'X':
353 ++t;
354 ch = 0;
355 /* We only handle single byte chars here. Make sure
356 we finish an escape sequence like "/xB0ABC" after
357 the first two digits. */
358 num_xdigits = 2;
359 while (num_xdigits--)
360 {
361 if (*t >= '0' && *t <= '9')
362 ch = (ch << 4) | (*t - '0');
363 else if (*t >= 'a' && *t <= 'f')
364 ch = (ch << 4) | (*t - 'a' + 10);
365 else if (*t >= 'A' && *t <= 'F')
366 ch = (ch << 4) | (*t - 'A' + 10);
367 else
368 break;
369 ++t;
370 }
371 *s++ = ch;
372 break;
373
374 default:
375 rcparse_warning ("unrecognized escape sequence");
376 *s++ = '\\';
377 *s++ = *t++;
378 break;
379 }
380 }
381 else if (*t != '"')
382 *s++ = *t++;
383 else if (t[1] == '\0')
384 break;
385 else if (t[1] == '"')
386 {
387 *s++ = '"';
388 t += 2;
389 }
390 else
391 {
392 rcparse_warning ("unexpected character after '\"'");
393 ++t;
394 assert (ISSPACE (*t));
395 while (ISSPACE (*t))
396 {
397 if ((*t) == '\n')
398 ++rc_lineno;
399 ++t;
400 }
401 if (*t == '\0')
402 break;
403 assert (*t == '"');
404 ++t;
405 }
406 }
407
408 *s = '\0';
409
410 *len = s - ret;
411
412 return ret;
413}
414
415/* Allocate a unicode string of a given length. */
416
417static unichar *
418get_unistring (int len)
419{
420 return (unichar *) get_string (len * sizeof (unichar));
421}
422
423/* Handle a quoted unicode string. The quotes are stripped. A pair of quotes
424 in a string are turned into a single quote. Adjacent strings are
425 merged separated by whitespace are merged, as in C. */
426
427static unichar *
428handle_uniquotes (rc_uint_type *len)
429{
430 const char *input = rclex_tok;
431 unichar *ret, *s;
432 const char *t;
433 int ch;
434 int num_xdigits;
435
436 ret = get_unistring (strlen (input) + 1);
437
438 s = ret;
439 t = input;
440 if ((*t == 'L' || *t == 'l') && t[1] == '"')
441 t += 2;
442 else if (*t == '"')
443 ++t;
444 while (*t != '\0')
445 {
446 if (*t == '\\')
447 {
448 ++t;
449 switch (*t)
450 {
451 case '\0':
452 rcparse_warning ("backslash at end of string");
453 break;
454
455 case '\"':
456 rcparse_warning ("use \"\" to put \" in a string");
457 break;
458
459 case 'a':
460 *s++ = ESCAPE_B; /* Strange, but true... */
461 ++t;
462 break;
463
464 case 'b':
465 *s++ = ESCAPE_B;
466 ++t;
467 break;
468
469 case 'f':
470 *s++ = ESCAPE_F;
471 ++t;
472 break;
473
474 case 'n':
475 *s++ = ESCAPE_N;
476 ++t;
477 break;
478
479 case 'r':
480 *s++ = ESCAPE_R;
481 ++t;
482 break;
483
484 case 't':
485 *s++ = ESCAPE_T;
486 ++t;
487 break;
488
489 case 'v':
490 *s++ = ESCAPE_V;
491 ++t;
492 break;
493
494 case '\\':
495 *s++ = (unichar) *t++;
496 break;
497
498 case '0': case '1': case '2': case '3':
499 case '4': case '5': case '6': case '7':
500 ch = *t - '0';
501 ++t;
502 if (*t >= '0' && *t <= '7')
503 {
504 ch = (ch << 3) | (*t - '0');
505 ++t;
506 if (*t >= '0' && *t <= '7')
507 {
508 ch = (ch << 3) | (*t - '0');
509 ++t;
510 }
511 }
512 *s++ = (unichar) ch;
513 break;
514
515 case 'x': case 'X':
516 ++t;
517 ch = 0;
518 /* We only handle two byte chars here. Make sure
519 we finish an escape sequence like "/xB0ABC" after
520 the first two digits. */
521 num_xdigits = 4;
522 while (num_xdigits--)
523 {
524 if (*t >= '0' && *t <= '9')
525 ch = (ch << 4) | (*t - '0');
526 else if (*t >= 'a' && *t <= 'f')
527 ch = (ch << 4) | (*t - 'a' + 10);
528 else if (*t >= 'A' && *t <= 'F')
529 ch = (ch << 4) | (*t - 'A' + 10);
530 else
531 break;
532 ++t;
533 }
534 *s++ = (unichar) ch;
535 break;
536
537 default:
538 rcparse_warning ("unrecognized escape sequence");
539 *s++ = '\\';
540 *s++ = (unichar) *t++;
541 break;
542 }
543 }
544 else if (*t != '"')
545 *s++ = (unichar) *t++;
546 else if (t[1] == '\0')
547 break;
548 else if (t[1] == '"')
549 {
550 *s++ = '"';
551 t += 2;
552 }
553 else
554 {
555 ++t;
556 assert (ISSPACE (*t));
557 while (ISSPACE (*t))
558 {
559 if ((*t) == '\n')
560 ++rc_lineno;
561 ++t;
562 }
563 if (*t == '\0')
564 break;
565 assert (*t == '"');
566 ++t;
567 }
568 }
569
570 *s = '\0';
571
572 *len = s - ret;
573
574 return ret;
575}
576
577/* Discard all the strings we have allocated. The parser calls this
578 when it no longer needs them. */
579
580void
581rcparse_discard_strings (void)
582{
583 struct alloc_string *as;
584
585 as = strings;
586 while (as != NULL)
587 {
588 struct alloc_string *n;
589
590 free (as->s);
591 n = as->next;
592 free (as);
593 as = n;
594 }
595
596 strings = NULL;
597}
598
599/* Enter rcdata mode. */
600void
601rcparse_rcdata (void)
602{
603 rcdata_mode = 1;
604}
605
606/* Go back to normal mode from rcdata mode. */
607void
608rcparse_normal (void)
609{
610 rcdata_mode = 0;
611}
612
613static void
614rclex_tok_add_char (int ch)
615{
616 if (! rclex_tok || rclex_tok_max <= rclex_tok_pos)
617 {
618 char *h = xmalloc (rclex_tok_max + 9);
619
620 if (! h)
621 abort ();
622 if (rclex_tok)
623 {
624 memcpy (h, rclex_tok, rclex_tok_pos + 1);
625 free (rclex_tok);
626 }
627 else
628 rclex_tok_pos = 0;
629 rclex_tok_max += 8;
630 rclex_tok = h;
631 }
632 if (ch != -1)
633 rclex_tok[rclex_tok_pos++] = (char) ch;
634 rclex_tok[rclex_tok_pos] = 0;
635}
636
637static int
638rclex_readch (void)
639{
640 int r = -1;
641
642 if ((r = rclex_lastch) != -1)
643 rclex_lastch = -1;
644 else
645 {
646 char ch;
647 do
648 {
649 if (! cpp_pipe || feof (cpp_pipe)
650 || fread (&ch, 1, 1,cpp_pipe) != 1)
651 break;
652 r = ((int) ch) & 0xff;
653 }
654 while (r == 0 || r == '\r');
655 }
656 rclex_tok_add_char (r);
657 return r;
658}
659
660static int
661rclex_peekch (void)
662{
663 int r;
664
665 if ((r = rclex_lastch) == -1)
666 {
667 if ((r = rclex_readch ()) != -1)
668 {
669 rclex_lastch = r;
670 if (rclex_tok_pos > 0)
671 rclex_tok[--rclex_tok_pos] = 0;
672 }
673 }
674 return r;
675}
676
677static void
678rclex_string (void)
679{
680 int c;
681
682 while ((c = rclex_peekch ()) != -1)
683 {
684 if (c == '\n')
685 break;
686 if (c == '\\')
687 {
688 rclex_readch ();
689 if ((c = rclex_peekch ()) == -1 || c == '\n')
690 break;
691 rclex_readch ();
692 }
693 else if (rclex_readch () == '"')
694 {
695 if (rclex_peekch () == '"')
696 rclex_readch ();
697 else
698 break;
699 }
700 }
701}
702
703static rc_uint_type
704read_digit (int ch)
705{
706 rc_uint_type base = 10;
707 rc_uint_type ret, val;
708 int warned = 0;
709
710 ret = 0;
711 if (ch == '0')
712 {
713 base = 8;
714 switch (rclex_peekch ())
715 {
716 case 'o': case 'O':
717 rclex_readch ();
718 base = 8;
719 break;
720
721 case 'x': case 'X':
722 rclex_readch ();
723 base = 16;
724 break;
725 }
726 }
727 else
728 ret = (rc_uint_type) (ch - '0');
729 while ((ch = rclex_peekch ()) != -1)
730 {
731 if (ISDIGIT (ch))
732 val = (rc_uint_type) (ch - '0');
733 else if (ch >= 'a' && ch <= 'f')
734 val = (rc_uint_type) ((ch - 'a') + 10);
735 else if (ch >= 'A' && ch <= 'F')
736 val = (rc_uint_type) ((ch - 'A') + 10);
737 else
738 break;
739 rclex_readch ();
740 if (! warned && val >= base)
741 {
742 warned = 1;
743 rcparse_warning ("digit exceeds base");
744 }
745 ret *= base;
746 ret += val;
747 }
748 return ret;
749}
750
751/* yyparser entry method. */
752
753int
754yylex (void)
755{
756 char *s;
757 unichar *us;
758 rc_uint_type length;
759 int ch;
760
761 /* Make sure that rclex_tok is initialized. */
762 if (! rclex_tok)
763 rclex_tok_add_char (-1);
764
765 do
766 {
767 do
768 {
769 /* Clear token. */
770 rclex_tok_pos = 0;
771 rclex_tok[0] = 0;
772
773 if ((ch = rclex_readch ()) == -1)
774 return -1;
775 if (ch == '\n')
776 ++rc_lineno;
777 }
778 while (ch <= 0x20);
779
780 switch (ch)
781 {
782 case '#':
783 while ((ch = rclex_peekch ()) != -1 && ch != '\n')
784 rclex_readch ();
785 cpp_line ();
786 ch = IGNORED_TOKEN;
787 break;
788
789 case '{':
790 ch = IGNORE_CPP (BEG);
791 break;
792
793 case '}':
794 ch = IGNORE_CPP (END);
795 break;
796
797 case '0': case '1': case '2': case '3': case '4':
798 case '5': case '6': case '7': case '8': case '9':
799 yylval.i.val = read_digit (ch);
800 yylval.i.dword = 0;
801 switch (rclex_peekch ())
802 {
803 case 'l': case 'L':
804 rclex_readch ();
805 yylval.i.dword = 1;
806 break;
807 }
808 ch = IGNORE_CPP (NUMBER);
809 break;
810 case '"':
811 rclex_string ();
812 ch = IGNORE_CPP ((! rcdata_mode ? QUOTEDSTRING : SIZEDSTRING));
813 if (ch == IGNORED_TOKEN)
814 break;
815 s = handle_quotes (&length);
816 if (! rcdata_mode)
817 yylval.s = s;
818 else
819 {
820 yylval.ss.length = length;
821 yylval.ss.s = s;
822 }
823 break;
824 case 'L': case 'l':
825 if (rclex_peekch () == '"')
826 {
827 rclex_readch ();
828 rclex_string ();
829 ch = IGNORE_CPP ((! rcdata_mode ? QUOTEDUNISTRING : SIZEDUNISTRING));
830 if (ch == IGNORED_TOKEN)
831 break;
832 us = handle_uniquotes (&length);
833 if (! rcdata_mode)
834 yylval.uni = us;
835 else
836 {
837 yylval.suni.length = length;
838 yylval.suni.s = us;
839 }
840 break;
841 }
842 /* Fall through. */
843 default:
844 if (ISIDST (ch) || ch=='$')
845 {
846 while ((ch = rclex_peekch ()) != -1 && (ISIDNUM (ch) || ch == '$' || ch == '.'))
847 rclex_readch ();
848 ch = IGNORE_CPP (rclex_translatekeyword (rclex_tok));
849 if (ch == STRING)
850 {
851 s = get_string (strlen (rclex_tok) + 1);
852 strcpy (s, rclex_tok);
853 yylval.s = s;
854 }
855 else if (ch == BLOCK)
856 {
857 const char *hs = NULL;
858
859 switch (yylex ())
860 {
861 case STRING:
862 case QUOTEDSTRING:
863 hs = yylval.s;
864 break;
865 case SIZEDSTRING:
866 hs = yylval.s = yylval.ss.s;
867 break;
868 }
869 if (! hs)
870 {
871 rcparse_warning ("BLOCK expects a string as argument.");
872 ch = IGNORED_TOKEN;
873 }
874 else if (! strcmp (hs, "StringFileInfo"))
875 ch = BLOCKSTRINGFILEINFO;
876 else if (! strcmp (hs, "VarFileInfo"))
877 ch = BLOCKVARFILEINFO;
878 }
879 break;
880 }
881 ch = IGNORE_CPP (ch);
882 break;
883 }
884 }
885 while (ch == IGNORED_TOKEN);
886
887 return ch;
888}
This page took 0.08225 seconds and 4 git commands to generate.