- FIXME: We do not recognize the full operand grammar defined in the MASM
- documentation. In particular, all the structure/union and
- high-level macro operands are missing.
-
- Uppercase words are terminals, lower case words are non-terminals.
- Objects surrounded by double brackets '[[' ']]' are optional. Vertical
- bars '|' denote choices. Most grammar productions are implemented in
- functions called 'intel_<production>'.
-
- Initial production is 'expr'.
-
- addOp + | -
-
- alpha [a-zA-Z]
-
- binOp & | AND | \| | OR | ^ | XOR
-
- byteRegister AL | AH | BL | BH | CL | CH | DL | DH
-
- constant digits [[ radixOverride ]]
-
- dataType BYTE | WORD | DWORD | FWORD | QWORD | TBYTE | OWORD | XMMWORD
-
- digits decdigit
- | digits decdigit
- | digits hexdigit
-
- decdigit [0-9]
-
- e04 e04 addOp e05
- | e05
-
- e05 e05 binOp e06
- | e06
-
- e06 e06 mulOp e09
- | e09
-
- e09 OFFSET e10
- | SHORT e10
- | + e10
- | - e10
- | ~ e10
- | NOT e10
- | e09 PTR e10
- | e09 : e10
- | e10
-
- e10 e10 [ expr ]
- | e11
-
- e11 ( expr )
- | [ expr ]
- | constant
- | dataType
- | id
- | $
- | register
-
- => expr expr cmpOp e04
- | e04
-
- gpRegister AX | EAX | BX | EBX | CX | ECX | DX | EDX
- | BP | EBP | SP | ESP | DI | EDI | SI | ESI
-
- hexdigit a | b | c | d | e | f
- | A | B | C | D | E | F
-
- id alpha
- | id alpha
- | id decdigit
-
- mulOp * | / | % | MOD | << | SHL | >> | SHR
-
- quote " | '
-
- register specialRegister
- | gpRegister
- | byteRegister
-
- segmentRegister CS | DS | ES | FS | GS | SS
-
- specialRegister CR0 | CR2 | CR3 | CR4
- | DR0 | DR1 | DR2 | DR3 | DR6 | DR7
- | TR3 | TR4 | TR5 | TR6 | TR7
-
- We simplify the grammar in obvious places (e.g., register parsing is
- done by calling parse_register) and eliminate immediate left recursion
- to implement a recursive-descent parser.
-
- expr e04 expr'
-
- expr' cmpOp e04 expr'
- | Empty
-
- e04 e05 e04'
-
- e04' addOp e05 e04'
- | Empty
-
- e05 e06 e05'
-
- e05' binOp e06 e05'
- | Empty
-
- e06 e09 e06'
-
- e06' mulOp e09 e06'
- | Empty
-
- e09 OFFSET e10 e09'
- | SHORT e10'
- | + e10'
- | - e10'
- | ~ e10'
- | NOT e10'
- | e10 e09'
-
- e09' PTR e10 e09'
- | : e10 e09'
- | Empty
-
- e10 e11 e10'
-
- e10' [ expr ] e10'
- | Empty
-
- e11 ( expr )
- | [ expr ]
- | BYTE
- | WORD
- | DWORD
- | FWORD
- | QWORD
- | TBYTE
- | OWORD
- | XMMWORD
- | .
- | $
- | register
- | id
- | constant */
-
-/* Parsing structure for the intel syntax parser. Used to implement the
- semantic actions for the operand grammar. */
-struct intel_parser_s
- {
- char *op_string; /* The string being parsed. */
- int got_a_float; /* Whether the operand is a float. */
- int op_modifier; /* Operand modifier. */
- int is_mem; /* 1 if operand is memory reference. */
- int in_offset; /* >=1 if parsing operand of offset. */
- int in_bracket; /* >=1 if parsing operand in brackets. */
- const reg_entry *reg; /* Last register reference found. */
- char *disp; /* Displacement string being built. */
- char *next_operand; /* Resume point when splitting operands. */
- };
-
-static struct intel_parser_s intel_parser;
-
-/* Token structure for parsing intel syntax. */
-struct intel_token
- {
- int code; /* Token code. */
- const reg_entry *reg; /* Register entry for register tokens. */
- char *str; /* String representation. */
- };
-
-static struct intel_token cur_token, prev_token;
-
-/* Token codes for the intel parser. Since T_SHORT is already used
- by COFF, undefine it first to prevent a warning. */
-#define T_NIL -1
-#define T_CONST 1
-#define T_REG 2
-#define T_BYTE 3
-#define T_WORD 4
-#define T_DWORD 5
-#define T_FWORD 6
-#define T_QWORD 7
-#define T_TBYTE 8
-#define T_XMMWORD 9
-#undef T_SHORT
-#define T_SHORT 10
-#define T_OFFSET 11
-#define T_PTR 12
-#define T_ID 13
-#define T_SHL 14
-#define T_SHR 15
-
-/* Prototypes for intel parser functions. */
-static int intel_match_token (int);
-static void intel_putback_token (void);
-static void intel_get_token (void);
-static int intel_expr (void);
-static int intel_e04 (void);
-static int intel_e05 (void);
-static int intel_e06 (void);
-static int intel_e09 (void);
-static int intel_e10 (void);
-static int intel_e11 (void);
-
-static int
-i386_intel_operand (char *operand_string, int got_a_float)
-{
- int ret;
- char *p;
-
- p = intel_parser.op_string = xstrdup (operand_string);
- intel_parser.disp = (char *) xmalloc (strlen (operand_string) + 1);
-
- for (;;)
- {
- /* Initialize token holders. */
- cur_token.code = prev_token.code = T_NIL;
- cur_token.reg = prev_token.reg = NULL;
- cur_token.str = prev_token.str = NULL;
-
- /* Initialize parser structure. */
- intel_parser.got_a_float = got_a_float;
- intel_parser.op_modifier = 0;
- intel_parser.is_mem = 0;
- intel_parser.in_offset = 0;
- intel_parser.in_bracket = 0;
- intel_parser.reg = NULL;
- intel_parser.disp[0] = '\0';
- intel_parser.next_operand = NULL;
-
- /* Read the first token and start the parser. */
- intel_get_token ();
- ret = intel_expr ();
-
- if (!ret)
- break;
-
- if (cur_token.code != T_NIL)
- {
- as_bad (_("invalid operand for '%s' ('%s' unexpected)"),
- current_templates->start->name, cur_token.str);
- ret = 0;
- }
- /* If we found a memory reference, hand it over to i386_displacement
- to fill in the rest of the operand fields. */
- else if (intel_parser.is_mem)
- {
- if ((i.mem_operands == 1
- && !current_templates->start->opcode_modifier.isstring)
- || i.mem_operands == 2)
- {
- as_bad (_("too many memory references for '%s'"),
- current_templates->start->name);
- ret = 0;
- }
- else
- {
- char *s = intel_parser.disp;
- i.mem_operands++;
-
- if (!quiet_warnings && intel_parser.is_mem < 0)
- /* See the comments in intel_bracket_expr. */
- as_warn (_("Treating `%s' as memory reference"), operand_string);
-
- /* Add the displacement expression. */
- if (*s != '\0')
- ret = i386_displacement (s, s + strlen (s));
- if (ret)
- {
- /* Swap base and index in 16-bit memory operands like
- [si+bx]. Since i386_index_check is also used in AT&T
- mode we have to do that here. */
- if (i.base_reg
- && i.index_reg
- && i.base_reg->reg_type.bitfield.reg16
- && i.index_reg->reg_type.bitfield.reg16
- && i.base_reg->reg_num >= 6
- && i.index_reg->reg_num < 6)
- {
- const reg_entry *base = i.index_reg;
-
- i.index_reg = i.base_reg;
- i.base_reg = base;
- }
- ret = i386_index_check (operand_string);
- }
- }
- }
-
- /* Constant and OFFSET expressions are handled by i386_immediate. */
- else if ((intel_parser.op_modifier & (1 << T_OFFSET))
- || intel_parser.reg == NULL)
- ret = i386_immediate (intel_parser.disp);
-
- if (intel_parser.next_operand && this_operand >= MAX_OPERANDS - 1)
- ret = 0;
- if (!ret || !intel_parser.next_operand)
- break;
- intel_parser.op_string = intel_parser.next_operand;
- this_operand = i.operands++;
- }
-
- free (p);
- free (intel_parser.disp);
-
- return ret;
-}
-
-#define NUM_ADDRESS_REGS (!!i.base_reg + !!i.index_reg)
-
-/* expr e04 expr'
-
- expr' cmpOp e04 expr'
- | Empty */
-static int
-intel_expr (void)
-{
- /* XXX Implement the comparison operators. */
- return intel_e04 ();
-}
-
-/* e04 e05 e04'
-
- e04' addOp e05 e04'
- | Empty */
-static int
-intel_e04 (void)
-{
- int nregs = -1;
-
- for (;;)
- {
- if (!intel_e05())
- return 0;
-
- if (nregs >= 0 && NUM_ADDRESS_REGS > nregs)
- i.base_reg = i386_regtab + REGNAM_AL; /* al is invalid as base */
-
- if (cur_token.code == '+')
- nregs = -1;
- else if (cur_token.code == '-')
- nregs = NUM_ADDRESS_REGS;
- else
- return 1;
-
- strcat (intel_parser.disp, cur_token.str);
- intel_match_token (cur_token.code);
- }
-}
-
-/* e05 e06 e05'
-
- e05' binOp e06 e05'
- | Empty */
-static int
-intel_e05 (void)
-{
- int nregs = ~NUM_ADDRESS_REGS;
-
- for (;;)
- {
- if (!intel_e06())
- return 0;
-
- if (cur_token.code == '&'
- || cur_token.code == '|'
- || cur_token.code == '^')
- {
- char str[2];
-
- str[0] = cur_token.code;
- str[1] = 0;
- strcat (intel_parser.disp, str);
- }
- else
- break;
-
- intel_match_token (cur_token.code);
-
- if (nregs < 0)
- nregs = ~nregs;
- }
- if (nregs >= 0 && NUM_ADDRESS_REGS > nregs)
- i.base_reg = i386_regtab + REGNAM_AL + 1; /* cl is invalid as base */
- return 1;
-}
-
-/* e06 e09 e06'
-
- e06' mulOp e09 e06'
- | Empty */
-static int
-intel_e06 (void)
-{
- int nregs = ~NUM_ADDRESS_REGS;
-
- for (;;)
- {
- if (!intel_e09())
- return 0;
-
- if (cur_token.code == '*'
- || cur_token.code == '/'
- || cur_token.code == '%')
- {
- char str[2];
-
- str[0] = cur_token.code;
- str[1] = 0;
- strcat (intel_parser.disp, str);
- }
- else if (cur_token.code == T_SHL)
- strcat (intel_parser.disp, "<<");
- else if (cur_token.code == T_SHR)
- strcat (intel_parser.disp, ">>");
- else
- break;
-
- intel_match_token (cur_token.code);
-
- if (nregs < 0)
- nregs = ~nregs;
- }
- if (nregs >= 0 && NUM_ADDRESS_REGS > nregs)
- i.base_reg = i386_regtab + REGNAM_AL + 2; /* dl is invalid as base */
- return 1;
-}
-
-/* e09 OFFSET e09
- | SHORT e09
- | + e09
- | - e09
- | ~ e09
- | NOT e09
- | e10 e09'
-
- e09' PTR e10 e09'
- | : e10 e09'
- | Empty */
-static int
-intel_e09 (void)