Fixed a couple of crashes.
[wine.git] / tools / wmc / mcy.y
blobf8bf78c88e50914868ea56db3fe7c45976222bba
1 /*
2 * Wine Message Compiler parser
4 * Copyright 2000 Bertho A. Stultiens (BS)
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 * NOTES:
22 * The basic grammar of the file is yet another example of, humpf,
23 * design. There is is mix of context-insensitive and -sentitive
24 * stuff, which makes it rather complicated.
25 * The header definitions are all context-insensitive because they have
26 * delimited arguments, whereas the message headers are (semi-) context-
27 * sensitive and the messages themselves are, well, RFC82[12] delimited.
28 * This mixture seems to originate from the time that ms and ibm were
29 * good friends and developing os/2 according to the "compatibility"
30 * switch and reading some comments here and there.
32 * I'll ignore most of the complications and concentrate on the concept
33 * which allows me to use yacc. Basically, everything is context-
34 * insensitive now, with the exception of the message-text itself and
35 * the preceding language declaration.
41 #include "config.h"
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <assert.h>
47 #include "utils.h"
48 #include "wmc.h"
49 #include "lang.h"
51 static const char err_syntax[] = "Syntax error";
52 static const char err_number[] = "Number expected";
53 static const char err_ident[] = "Identifier expected";
54 static const char err_assign[] = "'=' expected";
55 static const char err_popen[] = "'(' expected";
56 static const char err_pclose[] = "')' expected";
57 static const char err_colon[] = "':' expected";
58 static const char err_msg[] = "Message expected";
60 /* Scanner switches */
61 int want_nl = 0; /* Request next newlinw */
62 int want_line = 0; /* Request next complete line */
63 int want_file = 0; /* Request next ident as filename */
65 node_t *nodehead = NULL; /* The list of all parsed elements */
66 static node_t *nodetail = NULL;
67 lan_blk_t *lanblockhead; /* List of parsed elements transposed */
69 static int base = 16; /* Current printout base to use (8, 10 or 16) */
70 static WCHAR *cast = NULL; /* Current typecast to use */
72 static int last_id = 0; /* The last message ID parsed */
73 static int last_sev = 0; /* Last severity code parsed */
74 static int last_fac = 0; /* Last facility code parsed */
75 static WCHAR *last_sym = NULL;/* Last alias symbol parsed */
76 static int have_sev; /* Set if severity parsed for current message */
77 static int have_fac; /* Set if facility parsed for current message */
78 static int have_sym; /* Set is symbol parsed for current message */
80 static cp_xlat_t *cpxlattab = NULL; /* Codepage translation table */
81 static int ncpxlattab = 0;
83 /* Prototypes */
84 static WCHAR *merge(WCHAR *s1, WCHAR *s2);
85 static lanmsg_t *new_lanmsg(lan_cp_t *lcp, WCHAR *msg);
86 static msg_t *add_lanmsg(msg_t *msg, lanmsg_t *lanmsg);
87 static msg_t *complete_msg(msg_t *msg, int id);
88 static void add_node(node_e type, void *p);
89 static void do_add_token(tok_e type, token_t *tok, const char *code);
90 static void test_id(int id);
91 static int check_languages(node_t *head);
92 static lan_blk_t *block_messages(node_t *head);
93 static void add_cpxlat(int lan, int cpin, int cpout);
94 static cp_xlat_t *find_cpxlat(int lan);
99 %union {
100 WCHAR *str;
101 unsigned num;
102 token_t *tok;
103 lanmsg_t *lmp;
104 msg_t *msg;
105 lan_cp_t lcp;
109 %token tSEVNAMES tFACNAMES tLANNAMES tBASE tCODEPAGE
110 %token tTYPEDEF tNL tSYMNAME tMSGEND
111 %token tSEVERITY tFACILITY tLANGUAGE tMSGID
112 %token <str> tIDENT tLINE tFILE tCOMMENT
113 %token <num> tNUMBER
114 %token <tok> tTOKEN
116 %type <str> alias lines
117 %type <num> optcp id msgid clan
118 %type <tok> token
119 %type <lmp> body
120 %type <msg> bodies msg
121 %type <lcp> lang
124 file : items {
125 if(!check_languages(nodehead))
126 xyyerror("No messages defined");
127 lanblockhead = block_messages(nodehead);
131 items : decl
132 | items decl
135 decl : global
136 | msg { add_node(nd_msg, $1); }
137 | tCOMMENT { add_node(nd_comment, $1); }
138 | error { xyyerror(err_syntax); /* `Catch all' error */ }
141 global : tSEVNAMES '=' '(' smaps ')'
142 | tSEVNAMES '=' '(' smaps error { xyyerror(err_pclose); }
143 | tSEVNAMES '=' error { xyyerror(err_popen); }
144 | tSEVNAMES error { xyyerror(err_assign); }
145 | tFACNAMES '=' '(' fmaps ')'
146 | tFACNAMES '=' '(' fmaps error { xyyerror(err_pclose); }
147 | tFACNAMES '=' error { xyyerror(err_popen); }
148 | tFACNAMES error { xyyerror(err_assign); }
149 | tLANNAMES '=' '(' lmaps ')'
150 | tLANNAMES '=' '(' lmaps error { xyyerror(err_pclose); }
151 | tLANNAMES '=' error { xyyerror(err_popen); }
152 | tLANNAMES error { xyyerror(err_assign); }
153 | tCODEPAGE '=' '(' cmaps ')'
154 | tCODEPAGE '=' '(' cmaps error { xyyerror(err_pclose); }
155 | tCODEPAGE '=' error { xyyerror(err_popen); }
156 | tCODEPAGE error { xyyerror(err_assign); }
157 | tTYPEDEF '=' tIDENT { cast = $3; }
158 | tTYPEDEF '=' error { xyyerror(err_number); }
159 | tTYPEDEF error { xyyerror(err_assign); }
160 | tBASE '=' tNUMBER {
161 switch(base)
163 case 8:
164 case 10:
165 case 16:
166 base = $3;
167 break;
168 default:
169 xyyerror("Numberbase must be 8, 10 or 16");
172 | tBASE '=' error { xyyerror(err_number); }
173 | tBASE error { xyyerror(err_assign); }
176 /*----------------------------------------------------------------------
177 * SeverityNames mapping
179 smaps : smap
180 | smaps smap
181 | error { xyyerror(err_ident); }
184 smap : token '=' tNUMBER alias {
185 $1->token = $3;
186 $1->alias = $4;
187 if($3 & (~0x3))
188 xyyerror("Severity value out of range (0x%08x > 0x3)", $3);
189 do_add_token(tok_severity, $1, "severity");
191 | token '=' error { xyyerror(err_number); }
192 | token error { xyyerror(err_assign); }
195 /*----------------------------------------------------------------------
196 * FacilityNames mapping
198 fmaps : fmap
199 | fmaps fmap
200 | error { xyyerror(err_ident); }
203 fmap : token '=' tNUMBER alias {
204 $1->token = $3;
205 $1->alias = $4;
206 if($3 & (~0xfff))
207 xyyerror("Facility value out of range (0x%08x > 0xfff)", $3);
208 do_add_token(tok_facility, $1, "facility");
210 | token '=' error { xyyerror(err_number); }
211 | token error { xyyerror(err_assign); }
214 alias : /* Empty */ { $$ = NULL; }
215 | ':' tIDENT { $$ = $2; }
216 | ':' error { xyyerror(err_ident); }
219 /*----------------------------------------------------------------------
220 * LanguageNames mapping
222 lmaps : lmap
223 | lmaps lmap
224 | error { xyyerror(err_ident); }
227 lmap : token '=' tNUMBER setfile ':' tFILE optcp {
228 $1->token = $3;
229 $1->alias = $6;
230 $1->codepage = $7;
231 do_add_token(tok_language, $1, "language");
232 if(!find_language($3) && !find_cpxlat($3))
233 yywarning("Language 0x%x not built-in, using codepage %d; use explicit codepage to override", $3, WMC_DEFAULT_CODEPAGE);
235 | token '=' tNUMBER setfile ':' error { xyyerror("Filename expected"); }
236 | token '=' tNUMBER error { xyyerror(err_colon); }
237 | token '=' error { xyyerror(err_number); }
238 | token error { xyyerror(err_assign); }
241 optcp : /* Empty */ { $$ = 0; }
242 | ':' tNUMBER { $$ = $2; }
243 | ':' error { xyyerror("Codepage-number expected"); }
246 /*----------------------------------------------------------------------
247 * Codepages mapping
249 cmaps : cmap
250 | cmaps cmap
251 | error { xyyerror(err_ident); }
254 cmap : clan '=' tNUMBER ':' tNUMBER {
255 static const char err_nocp[] = "Codepage %d not builtin; cannot convert";
256 if(find_cpxlat($1))
257 xyyerror("Codepage translation already defined for language 0x%x", $1);
258 if($3 && !find_codepage($3))
259 xyyerror(err_nocp, $3);
260 if($5 && !find_codepage($5))
261 xyyerror(err_nocp, $5);
262 add_cpxlat($1, $3, $5);
264 | clan '=' tNUMBER ':' error { xyyerror(err_number); }
265 | clan '=' tNUMBER error { xyyerror(err_colon); }
266 | clan '=' error { xyyerror(err_number); }
267 | clan error { xyyerror(err_assign); }
270 clan : tNUMBER { $$ = $1; }
271 | tTOKEN {
272 if($1->type != tok_language)
273 xyyerror("Language name or code expected");
274 $$ = $1->token;
278 /*----------------------------------------------------------------------
279 * Message-definition parsing
281 msg : msgid sevfacsym { test_id($1); } bodies { $$ = complete_msg($4, $1); }
284 msgid : tMSGID '=' id {
285 if($3 & (~0xffff))
286 xyyerror("Message ID value out of range (0x%08x > 0xffff)", $3);
287 $$ = $3;
289 | tMSGID error { xyyerror(err_assign); }
292 id : /* Empty */ { $$ = ++last_id; }
293 | tNUMBER { $$ = last_id = $1; }
294 | '+' tNUMBER { $$ = last_id += $2; }
295 | '+' error { xyyerror(err_number); }
298 sevfacsym: /* Empty */ { have_sev = have_fac = have_sym = 0; }
299 | sevfacsym sev { if(have_sev) xyyerror("Severity already defined"); have_sev = 1; }
300 | sevfacsym fac { if(have_fac) xyyerror("Facility already defined"); have_fac = 1; }
301 | sevfacsym sym { if(have_sym) xyyerror("Symbolname already defined"); have_sym = 1; }
304 sym : tSYMNAME '=' tIDENT { last_sym = $3; }
305 | tSYMNAME '=' error { xyyerror(err_ident); }
306 | tSYMNAME error { xyyerror(err_assign); }
309 sev : tSEVERITY '=' token {
310 token_t *tok = lookup_token($3->name);
311 if(!tok)
312 xyyerror("Undefined severityname");
313 if(tok->type != tok_severity)
314 xyyerror("Identifier is not of class 'severity'");
315 last_sev = tok->token;
317 | tSEVERITY '=' error { xyyerror(err_ident); }
318 | tSEVERITY error { xyyerror(err_assign); }
321 fac : tFACILITY '=' token {
322 token_t *tok = lookup_token($3->name);
323 if(!tok)
324 xyyerror("Undefined facilityname");
325 if(tok->type != tok_facility)
326 xyyerror("Identifier is not of class 'facility'");
327 last_fac = tok->token;
329 | tFACILITY '=' error { xyyerror(err_ident); }
330 | tFACILITY error { xyyerror(err_assign); }
333 /*----------------------------------------------------------------------
334 * Message-text parsing
336 bodies : body { $$ = add_lanmsg(NULL, $1); }
337 | bodies body { $$ = add_lanmsg($1, $2); }
338 | error { xyyerror("'Language=...' (start of message text-definition) expected"); }
341 body : lang setline lines tMSGEND { $$ = new_lanmsg(&$1, $3); }
345 * The newline is to be able to set the codepage
346 * to the language based codepage for the next
347 * message to be parsed.
349 lang : tLANGUAGE setnl '=' token tNL {
350 token_t *tok = lookup_token($4->name);
351 cp_xlat_t *cpx;
352 if(!tok)
353 xyyerror("Undefined language");
354 if(tok->type != tok_language)
355 xyyerror("Identifier is not of class 'language'");
356 if((cpx = find_cpxlat(tok->token)))
358 set_codepage($$.codepage = cpx->cpin);
360 else if(!tok->codepage)
362 const language_t *lan = find_language(tok->token);
363 if(!lan)
365 /* Just set default; warning was given while parsing languagenames */
366 set_codepage($$.codepage = WMC_DEFAULT_CODEPAGE);
368 else
370 /* The default seems to be to use the DOS codepage... */
371 set_codepage($$.codepage = lan->doscp);
374 else
375 set_codepage($$.codepage = tok->codepage);
376 $$.language = tok->token;
378 | tLANGUAGE setnl '=' token error { xyyerror("Missing newline"); }
379 | tLANGUAGE setnl '=' error { xyyerror(err_ident); }
380 | tLANGUAGE error { xyyerror(err_assign); }
383 lines : tLINE { $$ = $1; }
384 | lines tLINE { $$ = merge($1, $2); }
385 | error { xyyerror(err_msg); }
386 | lines error { xyyerror(err_msg); }
389 /*----------------------------------------------------------------------
390 * Helper rules
392 token : tIDENT { $$ = xmalloc(sizeof(token_t)); $$->name = $1; }
393 | tTOKEN { $$ = $1; }
396 setnl : /* Empty */ { want_nl = 1; }
399 setline : /* Empty */ { want_line = 1; }
402 setfile : /* Empty */ { want_file = 1; }
407 static WCHAR *merge(WCHAR *s1, WCHAR *s2)
409 int l1 = unistrlen(s1);
410 int l2 = unistrlen(s2);
411 s1 = xrealloc(s1, (l1 + l2 + 1) * sizeof(*s1));
412 unistrcpy(s1+l1, s2);
413 free(s2);
414 return s1;
417 static void do_add_token(tok_e type, token_t *tok, const char *code)
419 token_t *tp = lookup_token(tok->name);
420 if(tp)
422 if(tok->type != type)
423 yywarning("Type change in token");
424 if(tp != tok)
425 xyyerror("Overlapping token not the same");
426 /* else its already defined and changed */
427 if(tok->fixed)
428 xyyerror("Redefinition of %s", code);
429 tok->fixed = 1;
431 else
433 add_token(type, tok->name, tok->token, tok->codepage, tok->alias, 1);
434 free(tok);
438 static lanmsg_t *new_lanmsg(lan_cp_t *lcp, WCHAR *msg)
440 lanmsg_t *lmp = (lanmsg_t *)xmalloc(sizeof(lanmsg_t));
441 lmp->lan = lcp->language;
442 lmp->cp = lcp->codepage;
443 lmp->msg = msg;
444 lmp->len = unistrlen(msg) + 1; /* Include termination */
445 if(lmp->len > 4096)
446 yywarning("Message exceptionally long; might be a missing termination");
447 return lmp;
450 static msg_t *add_lanmsg(msg_t *msg, lanmsg_t *lanmsg)
452 int i;
453 if(!msg)
454 msg = xmalloc(sizeof(msg_t));
455 msg->msgs = xrealloc(msg->msgs, (msg->nmsgs+1) * sizeof(*(msg->msgs)));
456 msg->msgs[msg->nmsgs] = lanmsg;
457 msg->nmsgs++;
458 for(i = 0; i < msg->nmsgs-1; i++)
460 if(msg->msgs[i]->lan == lanmsg->lan)
461 xyyerror("Message for language 0x%x already defined", lanmsg->lan);
463 return msg;
466 static int sort_lanmsg(const void *p1, const void *p2)
468 return (*(lanmsg_t **)p1)->lan - (*(lanmsg_t **)p2)->lan;
471 static msg_t *complete_msg(msg_t *mp, int id)
473 assert(mp != NULL);
474 mp->id = id;
475 if(have_sym)
476 mp->sym = last_sym;
477 else
478 xyyerror("No symbolic name defined for message id %d", id);
479 mp->sev = last_sev;
480 mp->fac = last_fac;
481 qsort(mp->msgs, mp->nmsgs, sizeof(*(mp->msgs)), sort_lanmsg);
482 mp->realid = id | (last_sev << 30) | (last_fac << 16);
483 if(custombit)
484 mp->realid |= 1 << 29;
485 mp->base = base;
486 mp->cast = cast;
487 return mp;
490 static void add_node(node_e type, void *p)
492 node_t *ndp = (node_t *)xmalloc(sizeof(node_t));
493 ndp->type = type;
494 ndp->u.all = p;
496 if(nodetail)
498 ndp->prev = nodetail;
499 nodetail->next = ndp;
500 nodetail = ndp;
502 else
504 nodehead = nodetail = ndp;
508 static void test_id(int id)
510 node_t *ndp;
511 for(ndp = nodehead; ndp; ndp = ndp->next)
513 if(ndp->type != nd_msg)
514 continue;
515 if(ndp->u.msg->id == id && ndp->u.msg->sev == last_sev && ndp->u.msg->fac == last_fac)
516 xyyerror("MessageId %d with facility 0x%x and severity 0x%x already defined", id, last_fac, last_sev);
520 static int check_languages(node_t *head)
522 static char err_missing[] = "Missing definition for language 0x%x; MessageID %d, facility 0x%x, severity 0x%x";
523 node_t *ndp;
524 int nm = 0;
525 msg_t *msg = NULL;
527 for(ndp = head; ndp; ndp = ndp->next)
529 if(ndp->type != nd_msg)
530 continue;
531 if(!nm)
533 msg = ndp->u.msg;
535 else
537 int i;
538 msg_t *m1;
539 msg_t *m2;
540 if(ndp->u.msg->nmsgs > msg->nmsgs)
542 m1 = ndp->u.msg;
543 m2 = msg;
545 else
547 m1 = msg;
548 m2 = ndp->u.msg;
551 for(i = 0; i < m1->nmsgs; i++)
553 if(i > m2->nmsgs)
554 error(err_missing, m1->msgs[i]->lan, m2->id, m2->fac, m2->sev);
555 else if(m1->msgs[i]->lan < m2->msgs[i]->lan)
556 error(err_missing, m1->msgs[i]->lan, m2->id, m2->fac, m2->sev);
557 else if(m1->msgs[i]->lan > m2->msgs[i]->lan)
558 error(err_missing, m2->msgs[i]->lan, m1->id, m1->fac, m1->sev);
561 nm++;
563 return nm;
566 #define MSGRID(x) ((*(msg_t **)(x))->realid)
567 static int sort_msg(const void *p1, const void *p2)
569 return MSGRID(p1) > MSGRID(p2) ? 1 : (MSGRID(p1) == MSGRID(p2) ? 0 : -1);
570 /* return (*(msg_t **)p1)->realid - (*(msg_t **)p1)->realid; */
574 * block_messages() basically transposes the messages
575 * from ID/language based list to a language/ID
576 * based list.
578 static lan_blk_t *block_messages(node_t *head)
580 lan_blk_t *lbp;
581 lan_blk_t *lblktail = NULL;
582 lan_blk_t *lblkhead = NULL;
583 msg_t **msgtab = NULL;
584 node_t *ndp;
585 int nmsg = 0;
586 int i;
587 int nl;
588 int factor = unicodeout ? 2 : 1;
590 for(ndp = head; ndp; ndp = ndp->next)
592 if(ndp->type != nd_msg)
593 continue;
594 msgtab = xrealloc(msgtab, (nmsg+1) * sizeof(*msgtab));
595 msgtab[nmsg++] = ndp->u.msg;
598 assert(nmsg != 0);
599 qsort(msgtab, nmsg, sizeof(*msgtab), sort_msg);
601 for(nl = 0; nl < msgtab[0]->nmsgs; nl++) /* This should be equal for all after check_languages() */
603 lbp = xmalloc(sizeof(lan_blk_t));
605 if(!lblktail)
607 lblkhead = lblktail = lbp;
609 else
611 lblktail->next = lbp;
612 lbp->prev = lblktail;
613 lblktail = lbp;
615 lbp->nblk = 1;
616 lbp->blks = xmalloc(sizeof(*lbp->blks));
617 lbp->blks[0].idlo = msgtab[0]->realid;
618 lbp->blks[0].idhi = msgtab[0]->realid;
619 /* The plus 4 is the entry header; (+3)&~3 is DWORD alignment */
620 lbp->blks[0].size = ((factor * msgtab[0]->msgs[nl]->len + 3) & ~3) + 4;
621 lbp->blks[0].msgs = xmalloc(sizeof(*lbp->blks[0].msgs));
622 lbp->blks[0].nmsg = 1;
623 lbp->blks[0].msgs[0] = msgtab[0]->msgs[nl];
624 lbp->lan = msgtab[0]->msgs[nl]->lan;
626 for(i = 1; i < nmsg; i++)
628 block_t *blk = &(lbp->blks[lbp->nblk-1]);
629 if(msgtab[i]->realid == blk->idhi+1)
631 blk->size += ((factor * msgtab[i]->msgs[nl]->len + 3) & ~3) + 4;
632 blk->idhi++;
633 blk->msgs = xrealloc(blk->msgs, (blk->nmsg+1) * sizeof(*blk->msgs));
634 blk->msgs[blk->nmsg++] = msgtab[i]->msgs[nl];
636 else
638 lbp->nblk++;
639 lbp->blks = xrealloc(lbp->blks, lbp->nblk * sizeof(*lbp->blks));
640 blk = &(lbp->blks[lbp->nblk-1]);
641 blk->idlo = msgtab[i]->realid;
642 blk->idhi = msgtab[i]->realid;
643 blk->size = ((factor * msgtab[i]->msgs[nl]->len + 3) & ~3) + 4;
644 blk->msgs = xmalloc(sizeof(*blk->msgs));
645 blk->nmsg = 1;
646 blk->msgs[0] = msgtab[i]->msgs[nl];
650 free(msgtab);
651 return lblkhead;
654 static int sc_xlat(const void *p1, const void *p2)
656 return ((cp_xlat_t *)p1)->lan - ((cp_xlat_t *)p2)->lan;
659 static void add_cpxlat(int lan, int cpin, int cpout)
661 cpxlattab = xrealloc(cpxlattab, (ncpxlattab+1) * sizeof(*cpxlattab));
662 cpxlattab[ncpxlattab].lan = lan;
663 cpxlattab[ncpxlattab].cpin = cpin;
664 cpxlattab[ncpxlattab].cpout = cpout;
665 ncpxlattab++;
666 qsort(cpxlattab, ncpxlattab, sizeof(*cpxlattab), sc_xlat);
669 static cp_xlat_t *find_cpxlat(int lan)
671 cp_xlat_t t;
673 if(!cpxlattab) return NULL;
675 t.lan = lan;
676 return (cp_xlat_t *)bsearch(&t, cpxlattab, ncpxlattab, sizeof(*cpxlattab), sc_xlat);