[deliverable/binutils-gdb.git] / binutils / rclex.c

/* rclex.c -- lexer for Windows rc files parser  */

/* Copyright 1997, 1998, 1999, 2001, 2002, 2003, 2005, 2006, 2007
   Free Software Foundation, Inc.

   Written by Kai Tietz, Onevision.

   This file is part of GNU Binutils.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
   02110-1301, USA.  */

/* This is a lexer used by the Windows rc file parser.  It basically
   just recognized a bunch of keywords.  */

#include "sysdep.h"
#include "bfd.h"
#include "bucomm.h"
#include "libiberty.h"
#include "safe-ctype.h"
#include "windres.h"
#include "rcparse.h"

#include <assert.h>

/* Whether we are in rcdata mode, in which we returns the lengths of
   strings.  */

static int rcdata_mode;

/* Whether we are supressing lines from cpp (including windows.h or
   headers from your C sources may bring in externs and typedefs).
   When active, we return IGNORED_TOKEN, which lets us ignore these
   outside of resource constructs.  Thus, it isn't required to protect
   all the non-preprocessor lines in your header files with #ifdef
   RC_INVOKED.  It also means your RC file can't include other RC
   files if they're named "*.h".  Sorry.  Name them *.rch or whatever.  */

static int suppress_cpp_data;

#define IGNORE_CPP(x) (suppress_cpp_data ? IGNORED_TOKEN : (x))

/* The first filename we detect in the cpp output.  We use this to
   tell included files from the original file.  */

static char *initial_fn;

/* List of allocated strings.  */

struct alloc_string
{
  struct alloc_string *next;
  char *s;
};

static struct alloc_string *strings;

struct rclex_keywords
{
  const char *name;
  int tok;
};

#define K(KEY)  { #KEY, KEY }
#define KRT(KEY)  { #KEY, RT_##KEY }

static const struct rclex_keywords keywds[] =
{
  K(ACCELERATORS), K(ALT), K(ANICURSOR), K(ANIICON), K(ASCII),
  K(AUTO3STATE), K(AUTOCHECKBOX), K(AUTORADIOBUTTON),
  K(BEDIT), { "BEGIN", BEG }, K(BITMAP), K(BLOCK), K(BUTTON),
  K(CAPTION), K(CHARACTERISTICS), K(CHECKBOX), K(CHECKED),
  K(CLASS), K(COMBOBOX), K(CONTROL), K(CTEXT), K(CURSOR),
  K(DEFPUSHBUTTON), K(DIALOG), K(DIALOGEX), K(DISCARDABLE),
  K(DLGINCLUDE), K(DLGINIT),
  K(EDITTEXT), K(END), K(EXSTYLE),
  K(FILEFLAGS), K(FILEFLAGSMASK), K(FILEOS), K(FILESUBTYPE),
  K(FILETYPE), K(FILEVERSION), K(FIXED), K(FONT), K(FONTDIR),
  K(GRAYED), KRT(GROUP_CURSOR), KRT(GROUP_ICON), K(GROUPBOX),
  K(HEDIT), K(HELP), K(HTML),
  K(ICON), K(IEDIT), K(IMPURE), K(INACTIVE),
  K(LANGUAGE), K(LISTBOX), K(LOADONCALL), K(LTEXT),
  K(MANIFEST), K(MENU), K(MENUBARBREAK), K(MENUBREAK),
  K(MENUEX), K(MENUITEM), K(MESSAGETABLE), K(MOVEABLE),
  K(NOINVERT), K(NOT),
  K(PLUGPLAY), K(POPUP), K(PRELOAD), K(PRODUCTVERSION),
  K(PURE), K(PUSHBOX), K(PUSHBUTTON),
  K(RADIOBUTTON), K(RCDATA), K(RTEXT),
  K(SCROLLBAR), K(SEPARATOR), K(SHIFT), K(STATE3),
  K(STRINGTABLE), K(STYLE),
  K(TOOLBAR),
  K(USERBUTTON),
  K(VALUE), { "VERSION", VERSIONK }, K(VERSIONINFO),
  K(VIRTKEY), K(VXD),
  { NULL, 0 },
};

/* External input stream from resrc */
extern FILE *cpp_pipe;

/* Lexical scanner helpers.  */
static int rclex_lastch = -1;
static size_t rclex_tok_max = 0;
static size_t rclex_tok_pos = 0;
static char *rclex_tok = NULL;

static int
rclex_translatekeyword (const char *key)
{
  if (key && ISUPPER (key[0]))
    {
      const struct rclex_keywords *kw = &keywds[0];

      do
        {
	  if (! strcmp (kw->name, key))
	    return kw->tok;
	  ++kw;
        }
      while (kw->name != NULL);
    }
  return STRING;
}

/* Handle a C preprocessor line.  */

static void
cpp_line (void)
{
  const char *s = rclex_tok;
  int line;
  char *send, *fn;
  size_t len, mlen;

  ++s;
  while (ISSPACE (*s))
    ++s;
  
  /* Check for #pragma code_page ( DEFAULT | <nr>).  */
  len = strlen (s);
  mlen = strlen ("pragma");
  if (len > mlen && memcmp (s, "pragma", mlen) == 0 && ISSPACE (s[mlen]))
    {
      const char *end;

      s += mlen + 1;
      while (ISSPACE (*s))
	++s;
      len = strlen (s);
      mlen = strlen ("code_page");
      if (len <= mlen || memcmp (s, "code_page", mlen) != 0)
	/* FIXME: We ought to issue a warning message about an unrecognised pragma.  */
	return;
      s += mlen;
      while (ISSPACE (*s))
	++s;
      if (*s != '(')
	/* FIXME: We ought to issue an error message about a malformed pragma.  */
	return;
      ++s;
      while (ISSPACE (*s))
	++s;
      if (*s == 0 || (end = strchr (s, ')')) == NULL)
	/* FIXME: We ought to issue an error message about a malformed pragma.  */
	return;
      len = (size_t) (end - s);
      fn = xmalloc (len + 1);
      if (len)
      	memcpy (fn, s, len);
      fn[len] = 0;
      while (len > 0 && (fn[len - 1] > 0 && fn[len - 1] <= 0x20))
	fn[--len] = 0;
      if (! len || (len == strlen ("DEFAULT") && strcasecmp (fn, "DEFAULT") == 0))
	wind_current_codepage = wind_default_codepage;
      else if (len > 0)
	{
	  rc_uint_type ncp;

	  if (fn[0] == '0' && (fn[1] == 'x' || fn[1] == 'X'))
	      ncp = (rc_uint_type) strtol (fn + 2, NULL, 16);
	  else
	      ncp = (rc_uint_type) strtol (fn, NULL, 10);
	  if (ncp == CP_UTF16 || ! unicode_is_valid_codepage (ncp))
	    fatal (_("invalid value specified for pragma code_page.\n"));
	  wind_current_codepage = ncp;
	}
      free (fn);
      return;
    }

  line = strtol (s, &send, 0);
  if (*send != '\0' && ! ISSPACE (*send))
    return;

  /* Subtract 1 because we are about to count the newline.  */
  rc_lineno = line - 1;

  s = send;
  while (ISSPACE (*s))
    ++s;

  if (*s != '"')
    return;

  ++s;
  send = strchr (s, '"');
  if (send == NULL)
    return;

  fn = xmalloc (send - s + 1);
  strncpy (fn, s, send - s);
  fn[send - s] = '\0';

  free (rc_filename);
  rc_filename = fn;

  if (! initial_fn)
    {
      initial_fn = xmalloc (strlen (fn) + 1);
      strcpy (initial_fn, fn);
    }

  /* Allow the initial file, regardless of name.  Suppress all other
     files if they end in ".h" (this allows included "*.rc").  */
  if (strcmp (initial_fn, fn) == 0
      || strcmp (fn + strlen (fn) - 2, ".h") != 0)
    suppress_cpp_data = 0;
  else
    suppress_cpp_data = 1;
}

/* Allocate a string of a given length.  */

static char *
get_string (int len)
{
  struct alloc_string *as;

  as = xmalloc (sizeof *as);
  as->s = xmalloc (len);

  as->next = strings;
  strings = as;

  return as->s;
}

/* Handle a quoted string.  The quotes are stripped.  A pair of quotes
   in a string are turned into a single quote.  Adjacent strings are
   merged separated by whitespace are merged, as in C.  */

static char *
handle_quotes (rc_uint_type *len)
{
  const char *input = rclex_tok;
  char *ret, *s;
  const char *t;
  int ch;
  int num_xdigits;

  ret = get_string (strlen (input) + 1);

  s = ret;
  t = input;
  if (*t == '"')
    ++t;
  while (*t != '\0')
    {
      if (*t == '\\')
	{
	  ++t;
	  switch (*t)
	    {
	    case '\0':
	      rcparse_warning ("backslash at end of string");
	      break;

	    case '\"':
	      rcparse_warning ("use \"\" to put \" in a string");
	      *s++ = '"';
	      ++t;
	      break;

	    case 'a':
	      *s++ = ESCAPE_B; /* Strange, but true...  */
	      ++t;
	      break;

	    case 'b':
	      *s++ = ESCAPE_B;
	      ++t;
	      break;

	    case 'f':
	      *s++ = ESCAPE_F;
	      ++t;
	      break;

	    case 'n':
	      *s++ = ESCAPE_N;
	      ++t;
	      break;

	    case 'r':
	      *s++ = ESCAPE_R;
	      ++t;
	      break;

	    case 't':
	      *s++ = ESCAPE_T;
	      ++t;
	      break;

	    case 'v':
	      *s++ = ESCAPE_V;
	      ++t;
	      break;

	    case '\\':
	      *s++ = *t++;
	      break;

	    case '0': case '1': case '2': case '3':
	    case '4': case '5': case '6': case '7':
	      ch = *t - '0';
	      ++t;
	      if (*t >= '0' && *t <= '7')
		{
		  ch = (ch << 3) | (*t - '0');
		  ++t;
		  if (*t >= '0' && *t <= '7')
		    {
		      ch = (ch << 3) | (*t - '0');
		      ++t;
		    }
		}
	      *s++ = ch;
	      break;

	    case 'x': case 'X':
	      ++t;
	      ch = 0;
	      /* We only handle single byte chars here.  Make sure
		 we finish an escape sequence like "/xB0ABC" after
		 the first two digits.  */
              num_xdigits = 2;
 	      while (num_xdigits--)
		{
		  if (*t >= '0' && *t <= '9')
		    ch = (ch << 4) | (*t - '0');
		  else if (*t >= 'a' && *t <= 'f')
		    ch = (ch << 4) | (*t - 'a' + 10);
		  else if (*t >= 'A' && *t <= 'F')
		    ch = (ch << 4) | (*t - 'A' + 10);
		  else
		    break;
		  ++t;
		}
	      *s++ = ch;
	      break;

	    default:
	      rcparse_warning ("unrecognized escape sequence");
	      *s++ = '\\';
	      *s++ = *t++;
	      break;
	    }
	}
      else if (*t != '"')
	*s++ = *t++;
      else if (t[1] == '\0')
	break;
      else if (t[1] == '"')
	{
	  *s++ = '"';
	  t += 2;
	}
      else
	{
	  rcparse_warning ("unexpected character after '\"'");
	  ++t;
	  assert (ISSPACE (*t));
	  while (ISSPACE (*t))
	    {
	      if ((*t) == '\n')
		++rc_lineno;
	      ++t;
	    }
	  if (*t == '\0')
	    break;
	  assert (*t == '"');
	  ++t;
	}
    }

  *s = '\0';

  *len = s - ret;

  return ret;
}

/* Allocate a unicode string of a given length.  */

static unichar *
get_unistring (int len)
{
  return (unichar *) get_string (len * sizeof (unichar));
}

/* Handle a quoted unicode string.  The quotes are stripped.  A pair of quotes
   in a string are turned into a single quote.  Adjacent strings are
   merged separated by whitespace are merged, as in C.  */

static unichar *
handle_uniquotes (rc_uint_type *len)
{
  const char *input = rclex_tok;
  unichar *ret, *s;
  const char *t;
  int ch;
  int num_xdigits;

  ret = get_unistring (strlen (input) + 1);

  s = ret;
  t = input;
  if ((*t == 'L' || *t == 'l') && t[1] == '"')
    t += 2;
  else if (*t == '"')
    ++t;
  while (*t != '\0')
    {
      if (*t == '\\')
	{
	  ++t;
	  switch (*t)
	    {
	    case '\0':
	      rcparse_warning ("backslash at end of string");
	      break;

	    case '\"':
	      rcparse_warning ("use \"\" to put \" in a string");
	      break;

	    case 'a':
	      *s++ = ESCAPE_B; /* Strange, but true...  */
	      ++t;
	      break;

	    case 'b':
	      *s++ = ESCAPE_B;
	      ++t;
	      break;

	    case 'f':
	      *s++ = ESCAPE_F;
	      ++t;
	      break;

	    case 'n':
	      *s++ = ESCAPE_N;
	      ++t;
	      break;

	    case 'r':
	      *s++ = ESCAPE_R;
	      ++t;
	      break;

	    case 't':
	      *s++ = ESCAPE_T;
	      ++t;
	      break;

	    case 'v':
	      *s++ = ESCAPE_V;
	      ++t;
	      break;

	    case '\\':
	      *s++ = (unichar) *t++;
	      break;

	    case '0': case '1': case '2': case '3':
	    case '4': case '5': case '6': case '7':
	      ch = *t - '0';
	      ++t;
	      if (*t >= '0' && *t <= '7')
		{
		  ch = (ch << 3) | (*t - '0');
		  ++t;
		  if (*t >= '0' && *t <= '7')
		    {
		      ch = (ch << 3) | (*t - '0');
		      ++t;
		    }
		}
	      *s++ = (unichar) ch;
	      break;

	    case 'x': case 'X':
	      ++t;
	      ch = 0;
	      /* We only handle two byte chars here.  Make sure
		 we finish an escape sequence like "/xB0ABC" after
		 the first two digits.  */
              num_xdigits = 4;
 	      while (num_xdigits--)
		{
		  if (*t >= '0' && *t <= '9')
		    ch = (ch << 4) | (*t - '0');
		  else if (*t >= 'a' && *t <= 'f')
		    ch = (ch << 4) | (*t - 'a' + 10);
		  else if (*t >= 'A' && *t <= 'F')
		    ch = (ch << 4) | (*t - 'A' + 10);
		  else
		    break;
		  ++t;
		}
	      *s++ = (unichar) ch;
	      break;

	    default:
	      rcparse_warning ("unrecognized escape sequence");
	      *s++ = '\\';
	      *s++ = (unichar) *t++;
	      break;
	    }
	}
      else if (*t != '"')
	*s++ = (unichar) *t++;
      else if (t[1] == '\0')
	break;
      else if (t[1] == '"')
	{
	  *s++ = '"';
	  t += 2;
	}
      else
	{
	  ++t;
	  assert (ISSPACE (*t));
	  while (ISSPACE (*t))
	    {
	      if ((*t) == '\n')
		++rc_lineno;
	      ++t;
	    }
	  if (*t == '\0')
	    break;
	  assert (*t == '"');
	  ++t;
	}
    }

  *s = '\0';

  *len = s - ret;

  return ret;
}

/* Discard all the strings we have allocated.  The parser calls this
   when it no longer needs them.  */

void
rcparse_discard_strings (void)
{
  struct alloc_string *as;

  as = strings;
  while (as != NULL)
    {
      struct alloc_string *n;

      free (as->s);
      n = as->next;
      free (as);
      as = n;
    }

  strings = NULL;
}

/* Enter rcdata mode.  */
void
rcparse_rcdata (void)
{
  rcdata_mode = 1;
}

/* Go back to normal mode from rcdata mode.  */
void
rcparse_normal (void)
{
  rcdata_mode = 0;
}

static void
rclex_tok_add_char (int ch)
{
  if (! rclex_tok || rclex_tok_max <= rclex_tok_pos)
    {
      char *h = xmalloc (rclex_tok_max + 9);

      if (! h)
	abort ();
      if (rclex_tok)
	{
	  memcpy (h, rclex_tok, rclex_tok_pos + 1);
	  free (rclex_tok);
	}
      else
	rclex_tok_pos = 0;
      rclex_tok_max += 8;
      rclex_tok = h;
    }
  if (ch != -1)
    rclex_tok[rclex_tok_pos++] = (char) ch;
  rclex_tok[rclex_tok_pos] = 0;
}

static int
rclex_readch (void)
{
  int r = -1;

  if ((r = rclex_lastch) != -1)
    rclex_lastch = -1;
  else
    {
      char ch;
      do
        {
	  if (! cpp_pipe || feof (cpp_pipe)
	      || fread (&ch, 1, 1,cpp_pipe) != 1)
	    break;
	  r = ((int) ch) & 0xff;
        }
      while (r == 0 || r == '\r');
  }
  rclex_tok_add_char (r);
  return r;
}

static int
rclex_peekch (void)
{
  int r;

  if ((r = rclex_lastch) == -1)
    {
      if ((r = rclex_readch ()) != -1)
	{
	  rclex_lastch = r;
	  if (rclex_tok_pos > 0)
	    rclex_tok[--rclex_tok_pos] = 0;
	}
    }
  return r;
}

static void
rclex_string (void)
{
  int c;
  
  while ((c = rclex_peekch ()) != -1)
    {
      if (c == '\n')
	break;
      if (c == '\\')
        {
	  rclex_readch ();
	  if ((c = rclex_peekch ()) == -1 || c == '\n')
	    break;
	  rclex_readch ();
        }
      else if (rclex_readch () == '"')
	{
	  if (rclex_peekch () == '"')
	    rclex_readch ();
	  else
	    break;
	}
    }
}

static rc_uint_type
read_digit (int ch)
{
  rc_uint_type base = 10;
  rc_uint_type ret, val;
  int warned = 0;

  ret = 0;
  if (ch == '0')
    {
      base = 8;
      switch (rclex_peekch ())
	{
	case 'o': case 'O':
	  rclex_readch ();
	  base = 8;
	  break;

	case 'x': case 'X':
	  rclex_readch ();
	  base = 16;
	  break;
	}
    }
  else
    ret = (rc_uint_type) (ch - '0');
  while ((ch = rclex_peekch ()) != -1)
    {
      if (ISDIGIT (ch))
	val = (rc_uint_type) (ch - '0');
      else if (ch >= 'a' && ch <= 'f')
	val = (rc_uint_type) ((ch - 'a') + 10);
      else if (ch >= 'A' && ch <= 'F')
	val = (rc_uint_type) ((ch - 'A') + 10);
      else
	break;
      rclex_readch ();
      if (! warned && val >= base)
	{
	  warned = 1;
	  rcparse_warning ("digit exceeds base");
	}
      ret *= base;
      ret += val;
    }
  return ret;
}

/* yyparser entry method.  */

int
yylex (void)
{
  char *s;
  unichar *us;
  rc_uint_type length;
  int ch;

  /* Make sure that rclex_tok is initialized.  */
  if (! rclex_tok)
    rclex_tok_add_char (-1);

  do
    {
      do
	{
	  /* Clear token.  */
	  rclex_tok_pos = 0;
	  rclex_tok[0] = 0;
	  
	  if ((ch = rclex_readch ()) == -1)
	    return -1;
	  if (ch == '\n')
	    ++rc_lineno;
	}
      while (ch <= 0x20);

      switch (ch)
	{
	case '#':
	  while ((ch = rclex_peekch ()) != -1 && ch != '\n')
	    rclex_readch ();
	  cpp_line ();
	  ch = IGNORED_TOKEN;
	  break;
	
	case '{':
	  ch = IGNORE_CPP (BEG);
	  break;
	
	case '}':
	  ch = IGNORE_CPP (END);
	  break;
	
	case '0': case '1': case '2': case '3': case '4':
	case '5': case '6': case '7': case '8': case '9':
	  yylval.i.val = read_digit (ch);
	  yylval.i.dword = 0;
	  switch (rclex_peekch ())
	    {
	    case 'l': case 'L':
	      rclex_readch ();
	      yylval.i.dword = 1;
	      break;
	    }
	  ch = IGNORE_CPP (NUMBER);
	  break;
	case '"':
	  rclex_string ();
	  ch = IGNORE_CPP ((! rcdata_mode ? QUOTEDSTRING : SIZEDSTRING));
	  if (ch == IGNORED_TOKEN)
	    break;
	  s = handle_quotes (&length);
	  if (! rcdata_mode)
	    yylval.s = s;
	  else
	    {
	      yylval.ss.length = length;
	      yylval.ss.s = s;
	  }
	  break;
	case 'L': case 'l':
	  if (rclex_peekch () == '"')
	    {
	      rclex_readch ();
	      rclex_string ();
	      ch = IGNORE_CPP ((! rcdata_mode ? QUOTEDUNISTRING : SIZEDUNISTRING));
	      if (ch == IGNORED_TOKEN)
		break;
	      us = handle_uniquotes (&length);
	      if (! rcdata_mode)
		yylval.uni = us;
	      else
	        {
		  yylval.suni.length = length;
		  yylval.suni.s = us;
	      }
	      break;
	    }
	  /* Fall through.  */
	default:
	  if (ISIDST (ch) || ch=='$')
	    {
	      while ((ch = rclex_peekch ()) != -1 && (ISIDNUM (ch) || ch == '$' || ch == '.'))
		rclex_readch ();
	      ch = IGNORE_CPP (rclex_translatekeyword (rclex_tok));
	      if (ch == STRING)
		{
		  s = get_string (strlen (rclex_tok) + 1);
		  strcpy (s, rclex_tok);
		  yylval.s = s;
		}
	      else if (ch == BLOCK)
		{
		  const char *hs = NULL;

		  switch (yylex ())
		  {
		  case STRING:
		  case QUOTEDSTRING:
		    hs = yylval.s;
		    break;
		  case SIZEDSTRING:
		    hs = yylval.s = yylval.ss.s;
		    break;
		  }
		  if (! hs)
		    {
		      rcparse_warning ("BLOCK expects a string as argument.");
		      ch = IGNORED_TOKEN;
		    }
		  else if (! strcmp (hs, "StringFileInfo"))
		    ch = BLOCKSTRINGFILEINFO;
		  else if (! strcmp (hs, "VarFileInfo"))
		    ch = BLOCKVARFILEINFO;
		}
	      break;
	    }
	  ch = IGNORE_CPP (ch);
	  break;
	}
    }
  while (ch == IGNORED_TOKEN);

  return ch;
}
Commit	Line	Data
4a594fce NC	1	/* rclex.c -- lexer for Windows rc files parser */
	2
	3	/* Copyright 1997, 1998, 1999, 2001, 2002, 2003, 2005, 2006, 2007
	4	Free Software Foundation, Inc.
	5
	6	Written by Kai Tietz, Onevision.
	7
	8	This file is part of GNU Binutils.
	9
	10	This program is free software; you can redistribute it and/or modify
	11	it under the terms of the GNU General Public License as published by
	12	the Free Software Foundation; either version 2 of the License, or
	13	(at your option) any later version.
	14
	15	This program is distributed in the hope that it will be useful,
	16	but WITHOUT ANY WARRANTY; without even the implied warranty of
	17	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	18	GNU General Public License for more details.
	19
	20	You should have received a copy of the GNU General Public License
	21	along with this program; if not, write to the Free Software
	22	Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
	23	02110-1301, USA. */
	24
	25	/* This is a lexer used by the Windows rc file parser. It basically
	26	just recognized a bunch of keywords. */
	27
	28	#include "sysdep.h"
	29	#include "bfd.h"
	30	#include "bucomm.h"
	31	#include "libiberty.h"
	32	#include "safe-ctype.h"
	33	#include "windres.h"
	34	#include "rcparse.h"
	35
	36	#include <assert.h>
	37
	38	/* Whether we are in rcdata mode, in which we returns the lengths of
	39	strings. */
	40
	41	static int rcdata_mode;
	42
	43	/* Whether we are supressing lines from cpp (including windows.h or
	44	headers from your C sources may bring in externs and typedefs).
	45	When active, we return IGNORED_TOKEN, which lets us ignore these
	46	outside of resource constructs. Thus, it isn't required to protect
	47	all the non-preprocessor lines in your header files with #ifdef
	48	RC_INVOKED. It also means your RC file can't include other RC
	49	files if they're named ".h". Sorry. Name them .rch or whatever. */
	50
	51	static int suppress_cpp_data;
	52
	53	#define IGNORE_CPP(x) (suppress_cpp_data ? IGNORED_TOKEN : (x))
	54
	55	/* The first filename we detect in the cpp output. We use this to
	56	tell included files from the original file. */
	57
	58	static char *initial_fn;
	59
	60	/* List of allocated strings. */
	61
	62	struct alloc_string
	63	{
	64	struct alloc_string *next;
65	char *s;
66	};
67
68	static struct alloc_string *strings;
69
70	struct rclex_keywords
71	{
72	const char *name;
73	int tok;
74	};
75
76	#define K(KEY) { #KEY, KEY }
77	#define KRT(KEY) { #KEY, RT_##KEY }
78
79	static const struct rclex_keywords keywds[] =
80	{
81	K(ACCELERATORS), K(ALT), K(ANICURSOR), K(ANIICON), K(ASCII),
82	K(AUTO3STATE), K(AUTOCHECKBOX), K(AUTORADIOBUTTON),
83	K(BEDIT), { "BEGIN", BEG }, K(BITMAP), K(BLOCK), K(BUTTON),
84	K(CAPTION), K(CHARACTERISTICS), K(CHECKBOX), K(CHECKED),
85	K(CLASS), K(COMBOBOX), K(CONTROL), K(CTEXT), K(CURSOR),
86	K(DEFPUSHBUTTON), K(DIALOG), K(DIALOGEX), K(DISCARDABLE),
87	K(DLGINCLUDE), K(DLGINIT),
88	K(EDITTEXT), K(END), K(EXSTYLE),
89	K(FILEFLAGS), K(FILEFLAGSMASK), K(FILEOS), K(FILESUBTYPE),
90	K(FILETYPE), K(FILEVERSION), K(FIXED), K(FONT), K(FONTDIR),
91	K(GRAYED), KRT(GROUP_CURSOR), KRT(GROUP_ICON), K(GROUPBOX),
92	K(HEDIT), K(HELP), K(HTML),
93	K(ICON), K(IEDIT), K(IMPURE), K(INACTIVE),
94	K(LANGUAGE), K(LISTBOX), K(LOADONCALL), K(LTEXT),
95	K(MANIFEST), K(MENU), K(MENUBARBREAK), K(MENUBREAK),
96	K(MENUEX), K(MENUITEM), K(MESSAGETABLE), K(MOVEABLE),
97	K(NOINVERT), K(NOT),
98	K(PLUGPLAY), K(POPUP), K(PRELOAD), K(PRODUCTVERSION),
99	K(PURE), K(PUSHBOX), K(PUSHBUTTON),
100	K(RADIOBUTTON), K(RCDATA), K(RTEXT),
101	K(SCROLLBAR), K(SEPARATOR), K(SHIFT), K(STATE3),
102	K(STRINGTABLE), K(STYLE),
103	K(TOOLBAR),
104	K(USERBUTTON),
105	K(VALUE), { "VERSION", VERSIONK }, K(VERSIONINFO),
106	K(VIRTKEY), K(VXD),
107	{ NULL, 0 },
108	};
109
110	/* External input stream from resrc */
111	extern FILE *cpp_pipe;
112
113	/* Lexical scanner helpers. */
114	static int rclex_lastch = -1;
115	static size_t rclex_tok_max = 0;
116	static size_t rclex_tok_pos = 0;
117	static char *rclex_tok = NULL;
118
119	static int
120	rclex_translatekeyword (const char *key)
121	{
122	if (key && ISUPPER (key[0]))
123	{
124	const struct rclex_keywords *kw = &keywds[0];
125
126	do
127	{
128	if (! strcmp (kw->name, key))
129	return kw->tok;
130	++kw;
131	}
132	while (kw->name != NULL);
133	}
134	return STRING;
135	}
136
137	/* Handle a C preprocessor line. */
138
139	static void
140	cpp_line (void)
141	{
142	const char *s = rclex_tok;
143	int line;
144	char send, fn;
d856f2dd	145	size_t len, mlen;
4a594fce NC	146
	147	++s;
	148	while (ISSPACE (*s))
	149	++s;
	150
d856f2dd NC	151	/* Check for #pragma code_page ( DEFAULT \| <nr>). */
	152	len = strlen (s);
	153	mlen = strlen ("pragma");
	154	if (len > mlen && memcmp (s, "pragma", mlen) == 0 && ISSPACE (s[mlen]))
	155	{
	156	const char *end;
	157
	158	s += mlen + 1;
	159	while (ISSPACE (*s))
	160	++s;
	161	len = strlen (s);
	162	mlen = strlen ("code_page");
	163	if (len <= mlen \|\| memcmp (s, "code_page", mlen) != 0)
	164	/* FIXME: We ought to issue a warning message about an unrecognised pragma. */
	165	return;
	166	s += mlen;
	167	while (ISSPACE (*s))
	168	++s;
	169	if (*s != '(')
	170	/* FIXME: We ought to issue an error message about a malformed pragma. */
	171	return;
	172	++s;
	173	while (ISSPACE (*s))
	174	++s;
	175	if (*s == 0 \|\| (end = strchr (s, ')')) == NULL)
	176	/* FIXME: We ought to issue an error message about a malformed pragma. */
	177	return;
	178	len = (size_t) (end - s);
	179	fn = xmalloc (len + 1);
	180	if (len)
	181	memcpy (fn, s, len);
	182	fn[len] = 0;
	183	while (len > 0 && (fn[len - 1] > 0 && fn[len - 1] <= 0x20))
	184	fn[--len] = 0;
	185	if (! len \|\| (len == strlen ("DEFAULT") && strcasecmp (fn, "DEFAULT") == 0))
	186	wind_current_codepage = wind_default_codepage;
	187	else if (len > 0)
	188	{
	189	rc_uint_type ncp;
	190
	191	if (fn[0] == '0' && (fn[1] == 'x' \|\| fn[1] == 'X'))
	192	ncp = (rc_uint_type) strtol (fn + 2, NULL, 16);
	193	else
	194	ncp = (rc_uint_type) strtol (fn, NULL, 10);
	195	if (ncp == CP_UTF16 \|\| ! unicode_is_valid_codepage (ncp))
	196	fatal (_("invalid value specified for pragma code_page.\n"));
	197	wind_current_codepage = ncp;
	198	}
	199	free (fn);
	200	return;
	201	}
	202
4a594fce NC	203	line = strtol (s, &send, 0);
	204	if (send != '\0' && ! ISSPACE (send))
	205	return;
	206
	207	/* Subtract 1 because we are about to count the newline. */
	208	rc_lineno = line - 1;
	209
	210	s = send;
	211	while (ISSPACE (*s))
	212	++s;
	213
	214	if (*s != '"')
	215	return;
	216
	217	++s;
	218	send = strchr (s, '"');
	219	if (send == NULL)
	220	return;
	221
	222	fn = xmalloc (send - s + 1);
	223	strncpy (fn, s, send - s);
	224	fn[send - s] = '\0';
	225
	226	free (rc_filename);
	227	rc_filename = fn;
	228
	229	if (! initial_fn)
	230	{
	231	initial_fn = xmalloc (strlen (fn) + 1);
	232	strcpy (initial_fn, fn);
	233	}
	234
	235	/* Allow the initial file, regardless of name. Suppress all other
	236	files if they end in ".h" (this allows included ".rc"). /
	237	if (strcmp (initial_fn, fn) == 0
	238	\|\| strcmp (fn + strlen (fn) - 2, ".h") != 0)
	239	suppress_cpp_data = 0;
	240	else
	241	suppress_cpp_data = 1;
	242	}
	243
	244	/* Allocate a string of a given length. */
	245
	246	static char *
	247	get_string (int len)
	248	{
	249	struct alloc_string *as;
	250
	251	as = xmalloc (sizeof *as);
	252	as->s = xmalloc (len);
	253
	254	as->next = strings;
	255	strings = as;
	256
	257	return as->s;
	258	}
	259
	260	/* Handle a quoted string. The quotes are stripped. A pair of quotes
	261	in a string are turned into a single quote. Adjacent strings are
	262	merged separated by whitespace are merged, as in C. */
	263
	264	static char *
	265	handle_quotes (rc_uint_type *len)
	266	{
267	const char *input = rclex_tok;
268	char ret, s;
269	const char *t;
270	int ch;
271	int num_xdigits;
272
273	ret = get_string (strlen (input) + 1);
274
275	s = ret;
276	t = input;
277	if (*t == '"')
278	++t;
279	while (*t != '\0')
280	{
281	if (*t == '\\')
282	{
283	++t;
284	switch (*t)
285	{
286	case '\0':
287	rcparse_warning ("backslash at end of string");
288	break;
289
290	case '\"':
291	rcparse_warning ("use \"\" to put \" in a string");
292	*s++ = '"';
293	++t;
294	break;
295
296	case 'a':
297	s++ = ESCAPE_B; / Strange, but true... */
298	++t;
299	break;
300
301	case 'b':
302	*s++ = ESCAPE_B;
303	++t;
304	break;
305
306	case 'f':
307	*s++ = ESCAPE_F;
308	++t;
309	break;
310
311	case 'n':
312	*s++ = ESCAPE_N;
313	++t;
314	break;
315
316	case 'r':
317	*s++ = ESCAPE_R;
318	++t;
319	break;
320
321	case 't':
322	*s++ = ESCAPE_T;
323	++t;
324	break;
325
326	case 'v':
327	*s++ = ESCAPE_V;
328	++t;
329	break;
330
331	case '\\':
332	s++ = t++;
333	break;
334
335	case '0': case '1': case '2': case '3':
336	case '4': case '5': case '6': case '7':
337	ch = *t - '0';
338	++t;
339	if (t >= '0' && t <= '7')
340	{
341	ch = (ch << 3) \| (*t - '0');
342	++t;
343	if (t >= '0' && t <= '7')
344	{
345	ch = (ch << 3) \| (*t - '0');
346	++t;
347	}
348	}
349	*s++ = ch;
350	break;
351
352	case 'x': case 'X':
353	++t;
354	ch = 0;
355	/* We only handle single byte chars here. Make sure
356	we finish an escape sequence like "/xB0ABC" after
357	the first two digits. */
358	num_xdigits = 2;
359	while (num_xdigits--)
360	{
361	if (t >= '0' && t <= '9')
362	ch = (ch << 4) \| (*t - '0');
363	else if (t >= 'a' && t <= 'f')
364	ch = (ch << 4) \| (*t - 'a' + 10);
365	else if (t >= 'A' && t <= 'F')
366	ch = (ch << 4) \| (*t - 'A' + 10);
367	else
368	break;
369	++t;
370	}
371	*s++ = ch;
372	break;
373
374	default:
375	rcparse_warning ("unrecognized escape sequence");
376	*s++ = '\\';
377	s++ = t++;
378	break;
379	}
380	}
381	else if (*t != '"')
382	s++ = t++;
383	else if (t[1] == '\0')
384	break;
385	else if (t[1] == '"')
386	{
387	*s++ = '"';
388	t += 2;
389	}
390	else
391	{
392	rcparse_warning ("unexpected character after '\"'");
393	++t;
394	assert (ISSPACE (*t));
395	while (ISSPACE (*t))
396	{
397	if ((*t) == '\n')
398	++rc_lineno;
399	++t;
400	}
401	if (*t == '\0')
402	break;
403	assert (*t == '"');
404	++t;
405	}
406	}
407
408	*s = '\0';
409
410	*len = s - ret;
411
412	return ret;
413	}
414
415	/* Allocate a unicode string of a given length. */
416
417	static unichar *
418	get_unistring (int len)
419	{
420	return (unichar ) get_string (len sizeof (unichar));
421	}
422
423	/* Handle a quoted unicode string. The quotes are stripped. A pair of quotes
424	in a string are turned into a single quote. Adjacent strings are
425	merged separated by whitespace are merged, as in C. */
426
427	static unichar *
428	handle_uniquotes (rc_uint_type *len)
429	{
430	const char *input = rclex_tok;
431	unichar ret, s;
432	const char *t;
433	int ch;
434	int num_xdigits;
435
436	ret = get_unistring (strlen (input) + 1);
437
438	s = ret;
439	t = input;
440	if ((t == 'L' \|\| t == 'l') && t[1] == '"')
441	t += 2;
442	else if (*t == '"')
443	++t;
444	while (*t != '\0')
445	{
446	if (*t == '\\')
447	{
448	++t;
449	switch (*t)
450	{
451	case '\0':
452	rcparse_warning ("backslash at end of string");
453	break;
454
455	case '\"':
456	rcparse_warning ("use \"\" to put \" in a string");
457	break;
458
459	case 'a':
460	s++ = ESCAPE_B; / Strange, but true... */
461	++t;
462	break;
463
464	case 'b':
465	*s++ = ESCAPE_B;
466	++t;
467	break;
468
469	case 'f':
470	*s++ = ESCAPE_F;
471	++t;
472	break;
473
474	case 'n':
475	*s++ = ESCAPE_N;
476	++t;
477	break;
478
479	case 'r':
480	*s++ = ESCAPE_R;
481	++t;
482	break;
483
484	case 't':
485	*s++ = ESCAPE_T;
486	++t;
487	break;
488
489	case 'v':
490	*s++ = ESCAPE_V;
491	++t;
492	break;
493
494	case '\\':
495	s++ = (unichar) t++;
496	break;
497
498	case '0': case '1': case '2': case '3':
499	case '4': case '5': case '6': case '7':
500	ch = *t - '0';
501	++t;
502	if (t >= '0' && t <= '7')
503	{
504	ch = (ch << 3) \| (*t - '0');
505	++t;
506	if (t >= '0' && t <= '7')
507	{
508	ch = (ch << 3) \| (*t - '0');
509	++t;
510	}
511	}
512	*s++ = (unichar) ch;
513	break;
514
515	case 'x': case 'X':
516	++t;
517	ch = 0;
518	/* We only handle two byte chars here. Make sure
519	we finish an escape sequence like "/xB0ABC" after
520	the first two digits. */
521	num_xdigits = 4;
522	while (num_xdigits--)
523	{
524	if (t >= '0' && t <= '9')
525	ch = (ch << 4) \| (*t - '0');
526	else if (t >= 'a' && t <= 'f')
527	ch = (ch << 4) \| (*t - 'a' + 10);
528	else if (t >= 'A' && t <= 'F')
529	ch = (ch << 4) \| (*t - 'A' + 10);
530	else
531	break;
532	++t;
533	}
534	*s++ = (unichar) ch;
535	break;
536
537	default:
538	rcparse_warning ("unrecognized escape sequence");
539	*s++ = '\\';
540	s++ = (unichar) t++;
541	break;
542	}
543	}
544	else if (*t != '"')
545	s++ = (unichar) t++;
546	else if (t[1] == '\0')
547	break;
548	else if (t[1] == '"')
549	{
550	*s++ = '"';
551	t += 2;
552	}
553	else
554	{
555	++t;
556	assert (ISSPACE (*t));
557	while (ISSPACE (*t))
558	{
559	if ((*t) == '\n')
560	++rc_lineno;
561	++t;
562	}
563	if (*t == '\0')
564	break;
565	assert (*t == '"');
566	++t;
567	}
568	}
569
570	*s = '\0';
571
572	*len = s - ret;
573
574	return ret;
575	}
576
577	/* Discard all the strings we have allocated. The parser calls this
578	when it no longer needs them. */
579
580	void
581	rcparse_discard_strings (void)
582	{
583	struct alloc_string *as;
584
585	as = strings;
586	while (as != NULL)
587	{
588	struct alloc_string *n;
589
590	free (as->s);
591	n = as->next;
592	free (as);
593	as = n;
594	}
595
596	strings = NULL;
597	}
598
599	/* Enter rcdata mode. */
600	void
601	rcparse_rcdata (void)
602	{
603	rcdata_mode = 1;
604	}
605
606	/* Go back to normal mode from rcdata mode. */
607	void
608	rcparse_normal (void)
609	{
610	rcdata_mode = 0;
611	}
612
613	static void
614	rclex_tok_add_char (int ch)
615	{
616	if (! rclex_tok \|\| rclex_tok_max <= rclex_tok_pos)
617	{
618	char *h = xmalloc (rclex_tok_max + 9);
619
620	if (! h)
621	abort ();
622	if (rclex_tok)
623	{
624	memcpy (h, rclex_tok, rclex_tok_pos + 1);
625	free (rclex_tok);
626	}
627	else
628	rclex_tok_pos = 0;
629	rclex_tok_max += 8;
630	rclex_tok = h;
631	}
632	if (ch != -1)
633	rclex_tok[rclex_tok_pos++] = (char) ch;
634	rclex_tok[rclex_tok_pos] = 0;
635	}
636
637	static int
638	rclex_readch (void)
639	{
640	int r = -1;
641
642	if ((r = rclex_lastch) != -1)
643	rclex_lastch = -1;
644	else
645	{
646	char ch;
647	do
648	{
649	if (! cpp_pipe \|\| feof (cpp_pipe)
650	\|\| fread (&ch, 1, 1,cpp_pipe) != 1)
651	break;
652	r = ((int) ch) & 0xff;
653	}
654	while (r == 0 \|\| r == '\r');
655	}
656	rclex_tok_add_char (r);
657	return r;
658	}
659
660	static int
661	rclex_peekch (void)
662	{
663	int r;
664
665	if ((r = rclex_lastch) == -1)
666	{
667	if ((r = rclex_readch ()) != -1)
668	{
669	rclex_lastch = r;
670	if (rclex_tok_pos > 0)
671	rclex_tok[--rclex_tok_pos] = 0;
672	}
673	}
674	return r;
675	}
676
677	static void
678	rclex_string (void)
679	{
680	int c;
681
682	while ((c = rclex_peekch ()) != -1)
683	{
684	if (c == '\n')
685	break;
686	if (c == '\\')
687	{
688	rclex_readch ();
689	if ((c = rclex_peekch ()) == -1 \|\| c == '\n')
690	break;
691	rclex_readch ();
692	}
693	else if (rclex_readch () == '"')
694	{
695	if (rclex_peekch () == '"')
696	rclex_readch ();
697	else
698	break;
699	}
700	}
701	}
702
703	static rc_uint_type
704	read_digit (int ch)
705	{
706	rc_uint_type base = 10;
707	rc_uint_type ret, val;
708	int warned = 0;
709
710	ret = 0;
711	if (ch == '0')
712	{
713	base = 8;
714	switch (rclex_peekch ())
715	{
716	case 'o': case 'O':
717	rclex_readch ();
718	base = 8;
719	break;
720
721	case 'x': case 'X':
722	rclex_readch ();
723	base = 16;
724	break;
725	}
726	}
727	else
728	ret = (rc_uint_type) (ch - '0');
729	while ((ch = rclex_peekch ()) != -1)
730	{
731	if (ISDIGIT (ch))
732	val = (rc_uint_type) (ch - '0');
733	else if (ch >= 'a' && ch <= 'f')
734	val = (rc_uint_type) ((ch - 'a') + 10);
735	else if (ch >= 'A' && ch <= 'F')
736	val = (rc_uint_type) ((ch - 'A') + 10);
737	else
738	break;
739	rclex_readch ();
740	if (! warned && val >= base)
741	{
742	warned = 1;
743	rcparse_warning ("digit exceeds base");
744	}
745	ret *= base;
746	ret += val;
747	}
748	return ret;
749	}
750
751	/* yyparser entry method. */
752
753	int
754	yylex (void)
755	{
756	char *s;
757	unichar *us;
758	rc_uint_type length;
759	int ch;
760
761	/* Make sure that rclex_tok is initialized. */
762	if (! rclex_tok)
763	rclex_tok_add_char (-1);
764
765	do
766	{
767	do
768	{
769	/* Clear token. */
770	rclex_tok_pos = 0;
771	rclex_tok[0] = 0;
772
773	if ((ch = rclex_readch ()) == -1)
774	return -1;
775	if (ch == '\n')
776	++rc_lineno;
777	}
778	while (ch <= 0x20);
779
780	switch (ch)
781	{
782	case '#':
783	while ((ch = rclex_peekch ()) != -1 && ch != '\n')
784	rclex_readch ();
785	cpp_line ();
786	ch = IGNORED_TOKEN;
787	break;
788
789	case '{':
790	ch = IGNORE_CPP (BEG);
791	break;
792
793	case '}':
794	ch = IGNORE_CPP (END);
795	break;
796
797	case '0': case '1': case '2': case '3': case '4':
798	case '5': case '6': case '7': case '8': case '9':
799	yylval.i.val = read_digit (ch);
800	yylval.i.dword = 0;
801	switch (rclex_peekch ())
802	{
803	case 'l': case 'L':
804	rclex_readch ();
805	yylval.i.dword = 1;
806	break;
807	}
808	ch = IGNORE_CPP (NUMBER);
809	break;
810	case '"':
811	rclex_string ();
812	ch = IGNORE_CPP ((! rcdata_mode ? QUOTEDSTRING : SIZEDSTRING));
813	if (ch == IGNORED_TOKEN)
814	break;
815	s = handle_quotes (&length);
816	if (! rcdata_mode)
817	yylval.s = s;
818	else
819	{
820	yylval.ss.length = length;
821	yylval.ss.s = s;
822	}
823	break;
824	case 'L': case 'l':
825	if (rclex_peekch () == '"')
826	{
827	rclex_readch ();
828	rclex_string ();
829	ch = IGNORE_CPP ((! rcdata_mode ? QUOTEDUNISTRING : SIZEDUNISTRING));
830	if (ch == IGNORED_TOKEN)
831	break;
832	us = handle_uniquotes (&length);
833	if (! rcdata_mode)
834	yylval.uni = us;
835	else
836	{
837	yylval.suni.length = length;
838	yylval.suni.s = us;
839	}
840	break;
841	}
842	/* Fall through. */
843	default:
844	if (ISIDST (ch) \|\| ch=='$')
845	{
846	while ((ch = rclex_peekch ()) != -1 && (ISIDNUM (ch) \|\| ch == '$' \|\| ch == '.'))
847	rclex_readch ();
848	ch = IGNORE_CPP (rclex_translatekeyword (rclex_tok));
849	if (ch == STRING)
850	{
851	s = get_string (strlen (rclex_tok) + 1);
852	strcpy (s, rclex_tok);
853	yylval.s = s;
854	}
855	else if (ch == BLOCK)
856	{
857	const char *hs = NULL;
858
859	switch (yylex ())
860	{
861	case STRING:
862	case QUOTEDSTRING:
863	hs = yylval.s;
864	break;
865	case SIZEDSTRING:
866	hs = yylval.s = yylval.ss.s;
867	break;
868	}
869	if (! hs)
870	{
871	rcparse_warning ("BLOCK expects a string as argument.");
872	ch = IGNORED_TOKEN;
873	}
874	else if (! strcmp (hs, "StringFileInfo"))
875	ch = BLOCKSTRINGFILEINFO;
876	else if (! strcmp (hs, "VarFileInfo"))
877	ch = BLOCKVARFILEINFO;
878	}
879	break;
880	}
881	ch = IGNORE_CPP (ch);
882	break;
883	}
884	}
885	while (ch == IGNORED_TOKEN);
886
887	return ch;
888	}