push cc8bc80451cc24f4d7cf75168b569f0ebfe19547
[wine/hacks.git] / tools / wmc / mcy.y
blob2887df56b6d1a322fa2fd3d7292190d1425a83eb
1 /*
2 * Wine Message Compiler parser
4 * Copyright 2000 Bertho A. Stultiens (BS)
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
20 * NOTES:
22 * The basic grammar of the file is yet another example of, humpf,
23 * design. There is a mix of context-insensitive and -sensitive
24 * stuff, which makes it rather complicated.
25 * The header definitions are all context-insensitive because they have
26 * delimited arguments, whereas the message headers are (semi-) context-
27 * sensitive and the messages themselves are, well, RFC82[12] delimited.
28 * This mixture seems to originate from the time that ms and ibm were
29 * good friends and developing os/2 according to the "compatibility"
30 * switch and reading some comments here and there.
32 * I'll ignore most of the complications and concentrate on the concept
33 * which allows me to use yacc. Basically, everything is context-
34 * insensitive now, with the exception of the message-text itself and
35 * the preceding language declaration.
41 #include "config.h"
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <assert.h>
47 #include "utils.h"
48 #include "wmc.h"
49 #include "lang.h"
51 static const char err_syntax[] = "Syntax error";
52 static const char err_number[] = "Number expected";
53 static const char err_ident[] = "Identifier expected";
54 static const char err_assign[] = "'=' expected";
55 static const char err_popen[] = "'(' expected";
56 static const char err_pclose[] = "')' expected";
57 static const char err_colon[] = "':' expected";
58 static const char err_msg[] = "Message expected";
60 /* Scanner switches */
61 int want_nl = 0; /* Request next newlinw */
62 int want_line = 0; /* Request next complete line */
63 int want_file = 0; /* Request next ident as filename */
65 node_t *nodehead = NULL; /* The list of all parsed elements */
66 static node_t *nodetail = NULL;
67 lan_blk_t *lanblockhead; /* List of parsed elements transposed */
69 static int base = 16; /* Current printout base to use (8, 10 or 16) */
70 static WCHAR *cast = NULL; /* Current typecast to use */
72 static int last_id = 0; /* The last message ID parsed */
73 static int last_sev = 0; /* Last severity code parsed */
74 static int last_fac = 0; /* Last facility code parsed */
75 static WCHAR *last_sym = NULL;/* Last alias symbol parsed */
76 static int have_sev; /* Set if severity parsed for current message */
77 static int have_fac; /* Set if facility parsed for current message */
78 static int have_sym; /* Set is symbol parsed for current message */
80 static cp_xlat_t *cpxlattab = NULL; /* Codepage translation table */
81 static int ncpxlattab = 0;
83 /* Prototypes */
84 static WCHAR *merge(WCHAR *s1, WCHAR *s2);
85 static lanmsg_t *new_lanmsg(lan_cp_t *lcp, WCHAR *msg);
86 static msg_t *add_lanmsg(msg_t *msg, lanmsg_t *lanmsg);
87 static msg_t *complete_msg(msg_t *msg, int id);
88 static void add_node(node_e type, void *p);
89 static void do_add_token(tok_e type, token_t *tok, const char *code);
90 static void test_id(int id);
91 static int check_languages(node_t *head);
92 static lan_blk_t *block_messages(node_t *head);
93 static void add_cpxlat(int lan, int cpin, int cpout);
94 static cp_xlat_t *find_cpxlat(int lan);
98 %union {
99 WCHAR *str;
100 unsigned num;
101 token_t *tok;
102 lanmsg_t *lmp;
103 msg_t *msg;
104 lan_cp_t lcp;
108 %token tSEVNAMES tFACNAMES tLANNAMES tBASE tCODEPAGE
109 %token tTYPEDEF tNL tSYMNAME tMSGEND
110 %token tSEVERITY tFACILITY tLANGUAGE tMSGID
111 %token <str> tIDENT tLINE tFILE tCOMMENT
112 %token <num> tNUMBER
113 %token <tok> tTOKEN
115 %type <str> alias lines
116 %type <num> optcp id msgid clan
117 %type <tok> token
118 %type <lmp> body
119 %type <msg> bodies msg
120 %type <lcp> lang
123 file : items {
124 if(!check_languages(nodehead))
125 xyyerror("No messages defined\n");
126 lanblockhead = block_messages(nodehead);
130 items : decl
131 | items decl
134 decl : global
135 | msg { add_node(nd_msg, $1); }
136 | tCOMMENT { add_node(nd_comment, $1); }
137 | error { xyyerror(err_syntax); /* `Catch all' error */ }
140 global : tSEVNAMES '=' '(' smaps ')'
141 | tSEVNAMES '=' '(' smaps error { xyyerror(err_pclose); }
142 | tSEVNAMES '=' error { xyyerror(err_popen); }
143 | tSEVNAMES error { xyyerror(err_assign); }
144 | tFACNAMES '=' '(' fmaps ')'
145 | tFACNAMES '=' '(' fmaps error { xyyerror(err_pclose); }
146 | tFACNAMES '=' error { xyyerror(err_popen); }
147 | tFACNAMES error { xyyerror(err_assign); }
148 | tLANNAMES '=' '(' lmaps ')'
149 | tLANNAMES '=' '(' lmaps error { xyyerror(err_pclose); }
150 | tLANNAMES '=' error { xyyerror(err_popen); }
151 | tLANNAMES error { xyyerror(err_assign); }
152 | tCODEPAGE '=' '(' cmaps ')'
153 | tCODEPAGE '=' '(' cmaps error { xyyerror(err_pclose); }
154 | tCODEPAGE '=' error { xyyerror(err_popen); }
155 | tCODEPAGE error { xyyerror(err_assign); }
156 | tTYPEDEF '=' tIDENT { cast = $3; }
157 | tTYPEDEF '=' error { xyyerror(err_number); }
158 | tTYPEDEF error { xyyerror(err_assign); }
159 | tBASE '=' tNUMBER {
160 switch(base)
162 case 8:
163 case 10:
164 case 16:
165 base = $3;
166 break;
167 default:
168 xyyerror("Numberbase must be 8, 10 or 16\n");
171 | tBASE '=' error { xyyerror(err_number); }
172 | tBASE error { xyyerror(err_assign); }
175 /*----------------------------------------------------------------------
176 * SeverityNames mapping
178 smaps : smap
179 | smaps smap
180 | error { xyyerror(err_ident); }
183 smap : token '=' tNUMBER alias {
184 $1->token = $3;
185 $1->alias = $4;
186 if($3 & (~0x3))
187 xyyerror("Severity value out of range (0x%08x > 0x3)\n", $3);
188 do_add_token(tok_severity, $1, "severity");
190 | token '=' error { xyyerror(err_number); }
191 | token error { xyyerror(err_assign); }
194 /*----------------------------------------------------------------------
195 * FacilityNames mapping
197 fmaps : fmap
198 | fmaps fmap
199 | error { xyyerror(err_ident); }
202 fmap : token '=' tNUMBER alias {
203 $1->token = $3;
204 $1->alias = $4;
205 if($3 & (~0xfff))
206 xyyerror("Facility value out of range (0x%08x > 0xfff)\n", $3);
207 do_add_token(tok_facility, $1, "facility");
209 | token '=' error { xyyerror(err_number); }
210 | token error { xyyerror(err_assign); }
213 alias : /* Empty */ { $$ = NULL; }
214 | ':' tIDENT { $$ = $2; }
215 | ':' error { xyyerror(err_ident); }
218 /*----------------------------------------------------------------------
219 * LanguageNames mapping
221 lmaps : lmap
222 | lmaps lmap
223 | error { xyyerror(err_ident); }
226 lmap : token '=' tNUMBER setfile ':' tFILE optcp {
227 $1->token = $3;
228 $1->alias = $6;
229 $1->codepage = $7;
230 do_add_token(tok_language, $1, "language");
231 if(!find_language($3) && !find_cpxlat($3))
232 mcy_warning("Language 0x%x not built-in, using codepage %d; use explicit codepage to override\n", $3, WMC_DEFAULT_CODEPAGE);
234 | token '=' tNUMBER setfile ':' error { xyyerror("Filename expected\n"); }
235 | token '=' tNUMBER error { xyyerror(err_colon); }
236 | token '=' error { xyyerror(err_number); }
237 | token error { xyyerror(err_assign); }
240 optcp : /* Empty */ { $$ = 0; }
241 | ':' tNUMBER { $$ = $2; }
242 | ':' error { xyyerror("Codepage-number expected\n"); }
245 /*----------------------------------------------------------------------
246 * Codepages mapping
248 cmaps : cmap
249 | cmaps cmap
250 | error { xyyerror(err_ident); }
253 cmap : clan '=' tNUMBER ':' tNUMBER {
254 static const char err_nocp[] = "Codepage %d not builtin; cannot convert";
255 if(find_cpxlat($1))
256 xyyerror("Codepage translation already defined for language 0x%x\n", $1);
257 if($3 && !find_codepage($3))
258 xyyerror(err_nocp, $3);
259 if($5 && !find_codepage($5))
260 xyyerror(err_nocp, $5);
261 add_cpxlat($1, $3, $5);
263 | clan '=' tNUMBER ':' error { xyyerror(err_number); }
264 | clan '=' tNUMBER error { xyyerror(err_colon); }
265 | clan '=' error { xyyerror(err_number); }
266 | clan error { xyyerror(err_assign); }
269 clan : tNUMBER { $$ = $1; }
270 | tTOKEN {
271 if($1->type != tok_language)
272 xyyerror("Language name or code expected\n");
273 $$ = $1->token;
277 /*----------------------------------------------------------------------
278 * Message-definition parsing
280 msg : msgid sevfacsym { test_id($1); } bodies { $$ = complete_msg($4, $1); }
283 msgid : tMSGID '=' id {
284 if($3 & (~0xffff))
285 xyyerror("Message ID value out of range (0x%08x > 0xffff)\n", $3);
286 $$ = $3;
288 | tMSGID error { xyyerror(err_assign); }
291 id : /* Empty */ { $$ = ++last_id; }
292 | tNUMBER { $$ = last_id = $1; }
293 | '+' tNUMBER { $$ = last_id += $2; }
294 | '+' error { xyyerror(err_number); }
297 sevfacsym: /* Empty */ { have_sev = have_fac = have_sym = 0; }
298 | sevfacsym sev { if(have_sev) xyyerror("Severity already defined\n"); have_sev = 1; }
299 | sevfacsym fac { if(have_fac) xyyerror("Facility already defined\n"); have_fac = 1; }
300 | sevfacsym sym { if(have_sym) xyyerror("Symbolname already defined\n"); have_sym = 1; }
303 sym : tSYMNAME '=' tIDENT { last_sym = $3; }
304 | tSYMNAME '=' error { xyyerror(err_ident); }
305 | tSYMNAME error { xyyerror(err_assign); }
308 sev : tSEVERITY '=' token {
309 token_t *tok = lookup_token($3->name);
310 if(!tok)
311 xyyerror("Undefined severityname\n");
312 if(tok->type != tok_severity)
313 xyyerror("Identifier is not of class 'severity'\n");
314 last_sev = tok->token;
316 | tSEVERITY '=' error { xyyerror(err_ident); }
317 | tSEVERITY error { xyyerror(err_assign); }
320 fac : tFACILITY '=' token {
321 token_t *tok = lookup_token($3->name);
322 if(!tok)
323 xyyerror("Undefined facilityname\n");
324 if(tok->type != tok_facility)
325 xyyerror("Identifier is not of class 'facility'\n");
326 last_fac = tok->token;
328 | tFACILITY '=' error { xyyerror(err_ident); }
329 | tFACILITY error { xyyerror(err_assign); }
332 /*----------------------------------------------------------------------
333 * Message-text parsing
335 bodies : body { $$ = add_lanmsg(NULL, $1); }
336 | bodies body { $$ = add_lanmsg($1, $2); }
337 | error { xyyerror("'Language=...' (start of message text-definition) expected\n"); }
340 body : lang setline lines tMSGEND { $$ = new_lanmsg(&$1, $3); }
344 * The newline is to be able to set the codepage
345 * to the language based codepage for the next
346 * message to be parsed.
348 lang : tLANGUAGE setnl '=' token tNL {
349 token_t *tok = lookup_token($4->name);
350 cp_xlat_t *cpx;
351 if(!tok)
352 xyyerror("Undefined language\n");
353 if(tok->type != tok_language)
354 xyyerror("Identifier is not of class 'language'\n");
355 if((cpx = find_cpxlat(tok->token)))
357 set_codepage($$.codepage = cpx->cpin);
359 else if(!tok->codepage)
361 const language_t *lan = find_language(tok->token);
362 if(!lan)
364 /* Just set default; warning was given while parsing languagenames */
365 set_codepage($$.codepage = WMC_DEFAULT_CODEPAGE);
367 else
369 /* The default seems to be to use the DOS codepage... */
370 set_codepage($$.codepage = lan->doscp);
373 else
374 set_codepage($$.codepage = tok->codepage);
375 $$.language = tok->token;
377 | tLANGUAGE setnl '=' token error { xyyerror("Missing newline\n"); }
378 | tLANGUAGE setnl '=' error { xyyerror(err_ident); }
379 | tLANGUAGE error { xyyerror(err_assign); }
382 lines : tLINE { $$ = $1; }
383 | lines tLINE { $$ = merge($1, $2); }
384 | error { xyyerror(err_msg); }
385 | lines error { xyyerror(err_msg); }
388 /*----------------------------------------------------------------------
389 * Helper rules
391 token : tIDENT { $$ = xmalloc(sizeof(token_t)); memset($$,0,sizeof(*$$)); $$->name = $1; }
392 | tTOKEN { $$ = $1; }
395 setnl : /* Empty */ { want_nl = 1; }
398 setline : /* Empty */ { want_line = 1; }
401 setfile : /* Empty */ { want_file = 1; }
406 static WCHAR *merge(WCHAR *s1, WCHAR *s2)
408 int l1 = unistrlen(s1);
409 int l2 = unistrlen(s2);
410 s1 = xrealloc(s1, (l1 + l2 + 1) * sizeof(*s1));
411 unistrcpy(s1+l1, s2);
412 free(s2);
413 return s1;
416 static void do_add_token(tok_e type, token_t *tok, const char *code)
418 token_t *tp = lookup_token(tok->name);
419 if(tp)
421 if(tok->type != type)
422 mcy_warning("Type change in token\n");
423 if(tp != tok)
424 xyyerror("Overlapping token not the same\n");
425 /* else its already defined and changed */
426 if(tok->fixed)
427 xyyerror("Redefinition of %s\n", code);
428 tok->fixed = 1;
430 else
432 add_token(type, tok->name, tok->token, tok->codepage, tok->alias, 1);
433 free(tok);
437 static lanmsg_t *new_lanmsg(lan_cp_t *lcp, WCHAR *msg)
439 lanmsg_t *lmp = xmalloc(sizeof(lanmsg_t));
440 lmp->lan = lcp->language;
441 lmp->cp = lcp->codepage;
442 lmp->msg = msg;
443 lmp->len = unistrlen(msg) + 1; /* Include termination */
444 if(lmp->len > 4096)
445 mcy_warning("Message exceptionally long; might be a missing termination\n");
446 return lmp;
449 static msg_t *add_lanmsg(msg_t *msg, lanmsg_t *lanmsg)
451 int i;
452 if(!msg)
454 msg = xmalloc(sizeof(msg_t));
455 memset( msg, 0, sizeof(*msg) );
457 msg->msgs = xrealloc(msg->msgs, (msg->nmsgs+1) * sizeof(*(msg->msgs)));
458 msg->msgs[msg->nmsgs] = lanmsg;
459 msg->nmsgs++;
460 for(i = 0; i < msg->nmsgs-1; i++)
462 if(msg->msgs[i]->lan == lanmsg->lan)
463 xyyerror("Message for language 0x%x already defined\n", lanmsg->lan);
465 return msg;
468 static int sort_lanmsg(const void *p1, const void *p2)
470 return (*(const lanmsg_t * const *)p1)->lan - (*(const lanmsg_t * const*)p2)->lan;
473 static msg_t *complete_msg(msg_t *mp, int id)
475 assert(mp != NULL);
476 mp->id = id;
477 if(have_sym)
478 mp->sym = last_sym;
479 else
480 xyyerror("No symbolic name defined for message id %d\n", id);
481 mp->sev = last_sev;
482 mp->fac = last_fac;
483 qsort(mp->msgs, mp->nmsgs, sizeof(*(mp->msgs)), sort_lanmsg);
484 mp->realid = id | (last_sev << 30) | (last_fac << 16);
485 if(custombit)
486 mp->realid |= 1 << 29;
487 mp->base = base;
488 mp->cast = cast;
489 return mp;
492 static void add_node(node_e type, void *p)
494 node_t *ndp = xmalloc(sizeof(node_t));
495 memset( ndp, 0, sizeof(*ndp) );
496 ndp->type = type;
497 ndp->u.all = p;
499 if(nodetail)
501 ndp->prev = nodetail;
502 nodetail->next = ndp;
503 nodetail = ndp;
505 else
507 nodehead = nodetail = ndp;
511 static void test_id(int id)
513 node_t *ndp;
514 for(ndp = nodehead; ndp; ndp = ndp->next)
516 if(ndp->type != nd_msg)
517 continue;
518 if(ndp->u.msg->id == id && ndp->u.msg->sev == last_sev && ndp->u.msg->fac == last_fac)
519 xyyerror("MessageId %d with facility 0x%x and severity 0x%x already defined\n", id, last_fac, last_sev);
523 static int check_languages(node_t *head)
525 static const char err_missing[] = "Missing definition for language 0x%x; MessageID %d, facility 0x%x, severity 0x%x";
526 node_t *ndp;
527 int nm = 0;
528 msg_t *msg = NULL;
530 for(ndp = head; ndp; ndp = ndp->next)
532 if(ndp->type != nd_msg)
533 continue;
534 if(!nm)
536 msg = ndp->u.msg;
538 else
540 int i;
541 msg_t *m1;
542 msg_t *m2;
543 if(ndp->u.msg->nmsgs > msg->nmsgs)
545 m1 = ndp->u.msg;
546 m2 = msg;
548 else
550 m1 = msg;
551 m2 = ndp->u.msg;
554 for(i = 0; i < m1->nmsgs; i++)
556 if(i > m2->nmsgs)
557 error(err_missing, m1->msgs[i]->lan, m2->id, m2->fac, m2->sev);
558 else if(m1->msgs[i]->lan < m2->msgs[i]->lan)
559 error(err_missing, m1->msgs[i]->lan, m2->id, m2->fac, m2->sev);
560 else if(m1->msgs[i]->lan > m2->msgs[i]->lan)
561 error(err_missing, m2->msgs[i]->lan, m1->id, m1->fac, m1->sev);
564 nm++;
566 return nm;
569 #define MSGRID(x) ((*(const msg_t * const*)(x))->realid)
570 static int sort_msg(const void *p1, const void *p2)
572 return MSGRID(p1) > MSGRID(p2) ? 1 : (MSGRID(p1) == MSGRID(p2) ? 0 : -1);
573 /* return (*(msg_t **)p1)->realid - (*(msg_t **)p1)->realid; */
577 * block_messages() basically transposes the messages
578 * from ID/language based list to a language/ID
579 * based list.
581 static lan_blk_t *block_messages(node_t *head)
583 lan_blk_t *lbp;
584 lan_blk_t *lblktail = NULL;
585 lan_blk_t *lblkhead = NULL;
586 msg_t **msgtab = NULL;
587 node_t *ndp;
588 int nmsg = 0;
589 int i;
590 int nl;
591 int factor = unicodeout ? 2 : 1;
593 for(ndp = head; ndp; ndp = ndp->next)
595 if(ndp->type != nd_msg)
596 continue;
597 msgtab = xrealloc(msgtab, (nmsg+1) * sizeof(*msgtab));
598 msgtab[nmsg++] = ndp->u.msg;
601 assert(nmsg != 0);
602 qsort(msgtab, nmsg, sizeof(*msgtab), sort_msg);
604 for(nl = 0; nl < msgtab[0]->nmsgs; nl++) /* This should be equal for all after check_languages() */
606 lbp = xmalloc(sizeof(lan_blk_t));
607 memset( lbp, 0, sizeof(*lbp) );
608 if(!lblktail)
610 lblkhead = lblktail = lbp;
612 else
614 lblktail->next = lbp;
615 lbp->prev = lblktail;
616 lblktail = lbp;
618 lbp->nblk = 1;
619 lbp->blks = xmalloc(sizeof(*lbp->blks));
620 lbp->blks[0].idlo = msgtab[0]->realid;
621 lbp->blks[0].idhi = msgtab[0]->realid;
622 /* The plus 4 is the entry header; (+3)&~3 is DWORD alignment */
623 lbp->blks[0].size = ((factor * msgtab[0]->msgs[nl]->len + 3) & ~3) + 4;
624 lbp->blks[0].msgs = xmalloc(sizeof(*lbp->blks[0].msgs));
625 lbp->blks[0].nmsg = 1;
626 lbp->blks[0].msgs[0] = msgtab[0]->msgs[nl];
627 lbp->lan = msgtab[0]->msgs[nl]->lan;
629 for(i = 1; i < nmsg; i++)
631 block_t *blk = &(lbp->blks[lbp->nblk-1]);
632 if(msgtab[i]->realid == blk->idhi+1)
634 blk->size += ((factor * msgtab[i]->msgs[nl]->len + 3) & ~3) + 4;
635 blk->idhi++;
636 blk->msgs = xrealloc(blk->msgs, (blk->nmsg+1) * sizeof(*blk->msgs));
637 blk->msgs[blk->nmsg++] = msgtab[i]->msgs[nl];
639 else
641 lbp->nblk++;
642 lbp->blks = xrealloc(lbp->blks, lbp->nblk * sizeof(*lbp->blks));
643 blk = &(lbp->blks[lbp->nblk-1]);
644 blk->idlo = msgtab[i]->realid;
645 blk->idhi = msgtab[i]->realid;
646 blk->size = ((factor * msgtab[i]->msgs[nl]->len + 3) & ~3) + 4;
647 blk->msgs = xmalloc(sizeof(*blk->msgs));
648 blk->nmsg = 1;
649 blk->msgs[0] = msgtab[i]->msgs[nl];
653 free(msgtab);
654 return lblkhead;
657 static int sc_xlat(const void *p1, const void *p2)
659 return ((const cp_xlat_t *)p1)->lan - ((const cp_xlat_t *)p2)->lan;
662 static void add_cpxlat(int lan, int cpin, int cpout)
664 cpxlattab = xrealloc(cpxlattab, (ncpxlattab+1) * sizeof(*cpxlattab));
665 cpxlattab[ncpxlattab].lan = lan;
666 cpxlattab[ncpxlattab].cpin = cpin;
667 cpxlattab[ncpxlattab].cpout = cpout;
668 ncpxlattab++;
669 qsort(cpxlattab, ncpxlattab, sizeof(*cpxlattab), sc_xlat);
672 static cp_xlat_t *find_cpxlat(int lan)
674 cp_xlat_t t;
676 if(!cpxlattab) return NULL;
678 t.lan = lan;
679 return (cp_xlat_t *)bsearch(&t, cpxlattab, ncpxlattab, sizeof(*cpxlattab), sc_xlat);