Commit | Line | Data |
---|---|---|
f6e6b40f BE |
1 | /* Assembler interface for targets using CGEN. -*- C -*- |
2 | CGEN: Cpu tools GENerator | |
3 | ||
4 | THIS FILE IS MACHINE GENERATED WITH CGEN. | |
5 | - the resultant file is machine generated, cgen-asm.in isn't | |
6 | ||
060d22b0 | 7 | Copyright 1996, 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc. |
f6e6b40f BE |
8 | |
9 | This file is part of the GNU Binutils and GDB, the GNU debugger. | |
10 | ||
11 | This program is free software; you can redistribute it and/or modify | |
12 | it under the terms of the GNU General Public License as published by | |
13 | the Free Software Foundation; either version 2, or (at your option) | |
14 | any later version. | |
15 | ||
16 | This program is distributed in the hope that it will be useful, | |
17 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 | GNU General Public License for more details. | |
20 | ||
21 | You should have received a copy of the GNU General Public License | |
22 | along with this program; if not, write to the Free Software Foundation, Inc., | |
23 | 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ | |
24 | ||
25 | /* ??? Eventually more and more of this stuff can go to cpu-independent files. | |
26 | Keep that in mind. */ | |
27 | ||
28 | #include "sysdep.h" | |
29 | #include <ctype.h> | |
30 | #include <stdio.h> | |
31 | #include "ansidecl.h" | |
32 | #include "bfd.h" | |
33 | #include "symcat.h" | |
34 | #include "@prefix@-desc.h" | |
35 | #include "@prefix@-opc.h" | |
36 | #include "opintl.h" | |
23969580 | 37 | #include "xregex.h" |
0e2ee3ca | 38 | #include "libiberty.h" |
f6e6b40f BE |
39 | |
40 | #undef min | |
41 | #define min(a,b) ((a) < (b) ? (a) : (b)) | |
42 | #undef max | |
43 | #define max(a,b) ((a) > (b) ? (a) : (b)) | |
44 | ||
45 | static const char * parse_insn_normal | |
46 | PARAMS ((CGEN_CPU_DESC, const CGEN_INSN *, const char **, CGEN_FIELDS *)); | |
47 | \f | |
48 | /* -- assembler routines inserted here */ | |
23969580 JJ |
49 | \f |
50 | ||
51 | /* | |
52 | Regex construction routine. | |
53 | ||
54 | This translates an opcode syntax string into a regex string, | |
55 | by replacing any non-character syntax element (such as an | |
56 | opcode) with the pattern '.*' | |
57 | ||
58 | It then compiles the regex and stores it in the opcode, for | |
59 | later use by @arch@_cgen_assemble_insn | |
60 | ||
0e2ee3ca | 61 | Returns NULL for success, an error message for failure. */ |
23969580 JJ |
62 | |
63 | char * | |
64 | @arch@_cgen_build_insn_regex (insn) | |
65 | CGEN_INSN *insn; | |
66 | { | |
0e2ee3ca | 67 | CGEN_OPCODE *opc = (CGEN_OPCODE *) CGEN_INSN_OPCODE (insn); |
23969580 JJ |
68 | const char *mnem = CGEN_INSN_MNEMONIC (insn); |
69 | int mnem_len; | |
70 | char rxbuf[CGEN_MAX_RX_ELEMENTS]; | |
71 | char *rx = rxbuf; | |
72 | const CGEN_SYNTAX_CHAR_TYPE *syn; | |
73 | int reg_err; | |
74 | ||
75 | syn = CGEN_SYNTAX_STRING (CGEN_OPCODE_SYNTAX (opc)); | |
76 | ||
77 | /* Mnemonics come first in the syntax string */ | |
78 | if (! CGEN_SYNTAX_MNEMONIC_P (* syn)) return "missing mnemonic in syntax string"; | |
79 | ++syn; | |
80 | ||
81 | /* copy the literal mnemonic out of the insn */ | |
82 | memset (rx, 0, CGEN_MAX_RX_ELEMENTS); | |
83 | mnem_len = strlen(mnem); | |
84 | memcpy (rx, mnem, mnem_len); | |
85 | rx += mnem_len; | |
86 | ||
87 | /* copy any remaining literals from the syntax string into the rx */ | |
88 | for(; * syn != 0 && rx < rxbuf + (CGEN_MAX_RX_ELEMENTS - 9); ++syn, ++rx) | |
89 | { | |
90 | if (CGEN_SYNTAX_CHAR_P (* syn)) | |
91 | { | |
92 | char tmp = CGEN_SYNTAX_CHAR (* syn); | |
93 | switch (tmp) | |
94 | { | |
95 | /* escape any regex metacharacters in the syntax */ | |
96 | case '.': case '[': case '\\': | |
97 | case '*': case '^': case '$': | |
98 | ||
99 | #ifdef CGEN_ESCAPE_EXTENDED_REGEX | |
100 | case '?': case '{': case '}': | |
101 | case '(': case ')': case '*': | |
102 | case '|': case '+': case ']': | |
103 | #endif | |
104 | ||
105 | * rx++ = '\\'; | |
106 | break; | |
107 | } | |
108 | /* insert syntax char into rx */ | |
109 | * rx = tmp; | |
110 | } | |
111 | else | |
112 | { | |
113 | /* replace non-syntax fields with globs */ | |
114 | * rx = '.'; | |
115 | * ++rx = '*'; | |
116 | } | |
117 | } | |
118 | ||
119 | /* trailing whitespace ok */ | |
120 | * rx++ = '['; | |
121 | * rx++ = ' '; | |
122 | * rx++ = '\t'; | |
123 | * rx++ = ']'; | |
124 | * rx++ = '*'; | |
125 | ||
126 | /* but anchor it after that */ | |
127 | * rx++ = '$'; | |
128 | * rx = '\0'; | |
129 | ||
130 | CGEN_INSN_RX (insn) = xmalloc (sizeof (regex_t)); | |
131 | reg_err = regcomp ((regex_t *) CGEN_INSN_RX (insn), rxbuf, REG_NOSUB|REG_ICASE); | |
132 | ||
133 | if (reg_err == 0) | |
134 | return NULL; | |
135 | else | |
136 | { | |
137 | static char msg[80]; | |
138 | regerror (reg_err, (regex_t *) CGEN_INSN_RX (insn), msg, 80); | |
139 | regfree ((regex_t *) CGEN_INSN_RX (insn)); | |
140 | free (CGEN_INSN_RX (insn)); | |
141 | (CGEN_INSN_RX (insn)) = NULL; | |
142 | return msg; | |
143 | } | |
144 | } | |
145 | ||
f6e6b40f BE |
146 | \f |
147 | /* Default insn parser. | |
148 | ||
149 | The syntax string is scanned and operands are parsed and stored in FIELDS. | |
150 | Relocs are queued as we go via other callbacks. | |
151 | ||
152 | ??? Note that this is currently an all-or-nothing parser. If we fail to | |
153 | parse the instruction, we return 0 and the caller will start over from | |
154 | the beginning. Backtracking will be necessary in parsing subexpressions, | |
155 | but that can be handled there. Not handling backtracking here may get | |
156 | expensive in the case of the m68k. Deal with later. | |
157 | ||
158 | Returns NULL for success, an error message for failure. | |
159 | */ | |
160 | ||
161 | static const char * | |
162 | parse_insn_normal (cd, insn, strp, fields) | |
163 | CGEN_CPU_DESC cd; | |
164 | const CGEN_INSN *insn; | |
165 | const char **strp; | |
166 | CGEN_FIELDS *fields; | |
167 | { | |
168 | /* ??? Runtime added insns not handled yet. */ | |
169 | const CGEN_SYNTAX *syntax = CGEN_INSN_SYNTAX (insn); | |
170 | const char *str = *strp; | |
171 | const char *errmsg; | |
172 | const char *p; | |
4a9f416d | 173 | const CGEN_SYNTAX_CHAR_TYPE * syn; |
f6e6b40f BE |
174 | #ifdef CGEN_MNEMONIC_OPERANDS |
175 | /* FIXME: wip */ | |
176 | int past_opcode_p; | |
177 | #endif | |
178 | ||
179 | /* For now we assume the mnemonic is first (there are no leading operands). | |
180 | We can parse it without needing to set up operand parsing. | |
181 | GAS's input scrubber will ensure mnemonics are lowercase, but we may | |
182 | not be called from GAS. */ | |
183 | p = CGEN_INSN_MNEMONIC (insn); | |
184 | while (*p && tolower (*p) == tolower (*str)) | |
185 | ++p, ++str; | |
186 | ||
187 | if (* p) | |
188 | return _("unrecognized instruction"); | |
189 | ||
190 | #ifndef CGEN_MNEMONIC_OPERANDS | |
191 | if (* str && !isspace (* str)) | |
192 | return _("unrecognized instruction"); | |
193 | #endif | |
194 | ||
195 | CGEN_INIT_PARSE (cd); | |
196 | cgen_init_parse_operand (cd); | |
197 | #ifdef CGEN_MNEMONIC_OPERANDS | |
198 | past_opcode_p = 0; | |
199 | #endif | |
200 | ||
201 | /* We don't check for (*str != '\0') here because we want to parse | |
202 | any trailing fake arguments in the syntax string. */ | |
203 | syn = CGEN_SYNTAX_STRING (syntax); | |
204 | ||
205 | /* Mnemonics come first for now, ensure valid string. */ | |
206 | if (! CGEN_SYNTAX_MNEMONIC_P (* syn)) | |
207 | abort (); | |
208 | ||
209 | ++syn; | |
210 | ||
211 | while (* syn != 0) | |
212 | { | |
213 | /* Non operand chars must match exactly. */ | |
214 | if (CGEN_SYNTAX_CHAR_P (* syn)) | |
215 | { | |
216 | /* FIXME: While we allow for non-GAS callers above, we assume the | |
217 | first char after the mnemonic part is a space. */ | |
218 | /* FIXME: We also take inappropriate advantage of the fact that | |
219 | GAS's input scrubber will remove extraneous blanks. */ | |
220 | if (tolower (*str) == tolower (CGEN_SYNTAX_CHAR (* syn))) | |
221 | { | |
222 | #ifdef CGEN_MNEMONIC_OPERANDS | |
4a9f416d | 223 | if (CGEN_SYNTAX_CHAR(* syn) == ' ') |
f6e6b40f BE |
224 | past_opcode_p = 1; |
225 | #endif | |
226 | ++ syn; | |
227 | ++ str; | |
228 | } | |
149fe25e | 229 | else if (*str) |
f6e6b40f BE |
230 | { |
231 | /* Syntax char didn't match. Can't be this insn. */ | |
232 | static char msg [80]; | |
233 | /* xgettext:c-format */ | |
234 | sprintf (msg, _("syntax error (expected char `%c', found `%c')"), | |
4a9f416d | 235 | CGEN_SYNTAX_CHAR(*syn), *str); |
f6e6b40f BE |
236 | return msg; |
237 | } | |
149fe25e FCE |
238 | else |
239 | { | |
240 | /* Ran out of input. */ | |
241 | static char msg [80]; | |
242 | /* xgettext:c-format */ | |
243 | sprintf (msg, _("syntax error (expected char `%c', found end of instruction)"), | |
4a9f416d | 244 | CGEN_SYNTAX_CHAR(*syn)); |
149fe25e FCE |
245 | return msg; |
246 | } | |
f6e6b40f BE |
247 | continue; |
248 | } | |
249 | ||
250 | /* We have an operand of some sort. */ | |
251 | errmsg = @arch@_cgen_parse_operand (cd, CGEN_SYNTAX_FIELD (*syn), | |
252 | &str, fields); | |
253 | if (errmsg) | |
254 | return errmsg; | |
255 | ||
256 | /* Done with this operand, continue with next one. */ | |
257 | ++ syn; | |
258 | } | |
259 | ||
260 | /* If we're at the end of the syntax string, we're done. */ | |
4a9f416d | 261 | if (* syn == 0) |
f6e6b40f BE |
262 | { |
263 | /* FIXME: For the moment we assume a valid `str' can only contain | |
264 | blanks now. IE: We needn't try again with a longer version of | |
265 | the insn and it is assumed that longer versions of insns appear | |
266 | before shorter ones (eg: lsr r2,r3,1 vs lsr r2,r3). */ | |
267 | while (isspace (* str)) | |
268 | ++ str; | |
269 | ||
270 | if (* str != '\0') | |
271 | return _("junk at end of line"); /* FIXME: would like to include `str' */ | |
272 | ||
273 | return NULL; | |
274 | } | |
275 | ||
276 | /* We couldn't parse it. */ | |
277 | return _("unrecognized instruction"); | |
278 | } | |
279 | \f | |
280 | /* Main entry point. | |
281 | This routine is called for each instruction to be assembled. | |
282 | STR points to the insn to be assembled. | |
283 | We assume all necessary tables have been initialized. | |
284 | The assembled instruction, less any fixups, is stored in BUF. | |
285 | Remember that if CGEN_INT_INSN_P then BUF is an int and thus the value | |
286 | still needs to be converted to target byte order, otherwise BUF is an array | |
287 | of bytes in target byte order. | |
288 | The result is a pointer to the insn's entry in the opcode table, | |
289 | or NULL if an error occured (an error message will have already been | |
290 | printed). | |
291 | ||
292 | Note that when processing (non-alias) macro-insns, | |
293 | this function recurses. | |
294 | ||
295 | ??? It's possible to make this cpu-independent. | |
296 | One would have to deal with a few minor things. | |
297 | At this point in time doing so would be more of a curiosity than useful | |
298 | [for example this file isn't _that_ big], but keeping the possibility in | |
299 | mind helps keep the design clean. */ | |
300 | ||
301 | const CGEN_INSN * | |
302 | @arch@_cgen_assemble_insn (cd, str, fields, buf, errmsg) | |
303 | CGEN_CPU_DESC cd; | |
304 | const char *str; | |
305 | CGEN_FIELDS *fields; | |
306 | CGEN_INSN_BYTES_PTR buf; | |
307 | char **errmsg; | |
308 | { | |
309 | const char *start; | |
310 | CGEN_INSN_LIST *ilist; | |
606d55bc FCE |
311 | const char *parse_errmsg = NULL; |
312 | const char *insert_errmsg = NULL; | |
23969580 | 313 | int recognized_mnemonic = 0; |
f6e6b40f BE |
314 | |
315 | /* Skip leading white space. */ | |
316 | while (isspace (* str)) | |
317 | ++ str; | |
318 | ||
319 | /* The instructions are stored in hashed lists. | |
320 | Get the first in the list. */ | |
321 | ilist = CGEN_ASM_LOOKUP_INSN (cd, str); | |
322 | ||
323 | /* Keep looking until we find a match. */ | |
324 | ||
325 | start = str; | |
326 | for ( ; ilist != NULL ; ilist = CGEN_ASM_NEXT_INSN (ilist)) | |
327 | { | |
328 | const CGEN_INSN *insn = ilist->insn; | |
23969580 | 329 | recognized_mnemonic = 1; |
f6e6b40f BE |
330 | |
331 | #ifdef CGEN_VALIDATE_INSN_SUPPORTED | |
332 | /* not usually needed as unsupported opcodes shouldn't be in the hash lists */ | |
333 | /* Is this insn supported by the selected cpu? */ | |
334 | if (! @arch@_cgen_insn_supported (cd, insn)) | |
335 | continue; | |
336 | #endif | |
337 | ||
338 | /* If the RELAX attribute is set, this is an insn that shouldn't be | |
339 | chosen immediately. Instead, it is used during assembler/linker | |
340 | relaxation if possible. */ | |
341 | if (CGEN_INSN_ATTR_VALUE (insn, CGEN_INSN_RELAX) != 0) | |
342 | continue; | |
343 | ||
344 | str = start; | |
345 | ||
23969580 JJ |
346 | /* skip this insn if str doesn't look right lexically */ |
347 | if (CGEN_INSN_RX (insn) != NULL && | |
348 | regexec ((regex_t *) CGEN_INSN_RX (insn), str, 0, NULL, 0) == REG_NOMATCH) | |
349 | continue; | |
350 | ||
f6e6b40f BE |
351 | /* Allow parse/insert handlers to obtain length of insn. */ |
352 | CGEN_FIELDS_BITSIZE (fields) = CGEN_INSN_BITSIZE (insn); | |
353 | ||
606d55bc FCE |
354 | parse_errmsg = CGEN_PARSE_FN (cd, insn) (cd, insn, & str, fields); |
355 | if (parse_errmsg != NULL) | |
f6e6b40f BE |
356 | continue; |
357 | ||
358 | /* ??? 0 is passed for `pc' */ | |
606d55bc FCE |
359 | insert_errmsg = CGEN_INSERT_FN (cd, insn) (cd, insn, fields, buf, |
360 | (bfd_vma) 0); | |
361 | if (insert_errmsg != NULL) | |
f6e6b40f BE |
362 | continue; |
363 | ||
364 | /* It is up to the caller to actually output the insn and any | |
365 | queued relocs. */ | |
366 | return insn; | |
367 | } | |
368 | ||
f6e6b40f BE |
369 | { |
370 | static char errbuf[150]; | |
fca2040b | 371 | #ifdef CGEN_VERBOSE_ASSEMBLER_ERRORS |
606d55bc | 372 | const char *tmp_errmsg; |
f6e6b40f | 373 | |
606d55bc FCE |
374 | /* If requesting verbose error messages, use insert_errmsg. |
375 | Failing that, use parse_errmsg */ | |
376 | tmp_errmsg = (insert_errmsg ? insert_errmsg : | |
377 | parse_errmsg ? parse_errmsg : | |
23969580 | 378 | recognized_mnemonic ? _("unrecognized form of instruction") : |
606d55bc FCE |
379 | _("unrecognized instruction")); |
380 | ||
f6e6b40f BE |
381 | if (strlen (start) > 50) |
382 | /* xgettext:c-format */ | |
383 | sprintf (errbuf, "%s `%.50s...'", tmp_errmsg, start); | |
384 | else | |
385 | /* xgettext:c-format */ | |
386 | sprintf (errbuf, "%s `%.50s'", tmp_errmsg, start); | |
387 | #else | |
388 | if (strlen (start) > 50) | |
389 | /* xgettext:c-format */ | |
390 | sprintf (errbuf, _("bad instruction `%.50s...'"), start); | |
391 | else | |
392 | /* xgettext:c-format */ | |
393 | sprintf (errbuf, _("bad instruction `%.50s'"), start); | |
394 | #endif | |
395 | ||
396 | *errmsg = errbuf; | |
397 | return NULL; | |
398 | } | |
399 | } | |
400 | \f | |
401 | #if 0 /* This calls back to GAS which we can't do without care. */ | |
402 | ||
403 | /* Record each member of OPVALS in the assembler's symbol table. | |
404 | This lets GAS parse registers for us. | |
405 | ??? Interesting idea but not currently used. */ | |
406 | ||
407 | /* Record each member of OPVALS in the assembler's symbol table. | |
408 | FIXME: Not currently used. */ | |
409 | ||
410 | void | |
411 | @arch@_cgen_asm_hash_keywords (cd, opvals) | |
412 | CGEN_CPU_DESC cd; | |
413 | CGEN_KEYWORD *opvals; | |
414 | { | |
415 | CGEN_KEYWORD_SEARCH search = cgen_keyword_search_init (opvals, NULL); | |
416 | const CGEN_KEYWORD_ENTRY * ke; | |
417 | ||
418 | while ((ke = cgen_keyword_search_next (& search)) != NULL) | |
419 | { | |
420 | #if 0 /* Unnecessary, should be done in the search routine. */ | |
421 | if (! @arch@_cgen_opval_supported (ke)) | |
422 | continue; | |
423 | #endif | |
424 | cgen_asm_record_register (cd, ke->name, ke->value); | |
425 | } | |
426 | } | |
427 | ||
428 | #endif /* 0 */ |