2 * Wine Message Compiler parser
4 * Copyright 2000 Bertho A. Stultiens (BS)
6 * The basic grammar of the file is yet another example of, humpf,
7 * design. There is is mix of context-insensitive and -sentitive
8 * stuff, which makes it rather complicated.
9 * The header definitions are all context-insensitive because they have
10 * delimited arguments, whereas the message headers are (semi-) context-
11 * sensitive and the messages themselves are, well, RFC82[12] delimited.
12 * This mixture seems to originate from the time that ms and ibm were
13 * good friends and developing os/2 according to the "compatibility"
14 * switch and reading some comments here and there.
16 * I'll ignore most of the complications and concentrate on the concept
17 * which allows me to use yacc. Basically, everything is context-
18 * insensitive now, with the exception of the message-text itself and
19 * the preceding language declaration.
35 static const char err_syntax
[] = "Syntax error";
36 static const char err_number
[] = "Number expected";
37 static const char err_ident
[] = "Identifier expected";
38 static const char err_assign
[] = "'=' expected";
39 static const char err_popen
[] = "'(' expected";
40 static const char err_pclose
[] = "')' expected";
41 static const char err_colon
[] = "':' expected";
42 static const char err_msg
[] = "Message expected";
44 /* Scanner switches */
45 int want_nl
= 0; /* Request next newlinw */
46 int want_line
= 0; /* Request next complete line */
47 int want_file
= 0; /* Request next ident as filename */
49 node_t
*nodehead
= NULL
; /* The list of all parsed elements */
50 static node_t
*nodetail
= NULL
;
51 lan_blk_t
*lanblockhead
; /* List of parsed elements transposed */
53 static int base
= 16; /* Current printout base to use (8, 10 or 16) */
54 static WCHAR
*cast
= NULL
; /* Current typecast to use */
56 static int last_id
= 0; /* The last message ID parsed */
57 static int last_sev
= 0; /* Last severity code parsed */
58 static int last_fac
= 0; /* Last facility code parsed */
59 static WCHAR
*last_sym
= NULL
;/* Last alias symbol parsed */
60 static int have_sev
; /* Set if severity parsed for current message */
61 static int have_fac
; /* Set if facility parsed for current message */
62 static int have_sym
; /* Set is symbol parsed for current message */
64 static cp_xlat_t
*cpxlattab
= NULL
; /* Codepage translation table */
65 static int ncpxlattab
= 0;
68 static WCHAR
*merge
(WCHAR
*s1
, WCHAR
*s2
);
69 static lanmsg_t
*new_lanmsg
(lan_cp_t
*lcp
, WCHAR
*msg
);
70 static msg_t
*add_lanmsg
(msg_t
*msg
, lanmsg_t
*lanmsg
);
71 static msg_t
*complete_msg
(msg_t
*msg
, int id
);
72 static void add_node
(node_e type
, void *p
);
73 static void do_add_token
(tok_e type
, token_t
*tok
, const char *code
);
74 static void test_id
(int id
);
75 static int check_languages
(node_t
*head
);
76 static lan_blk_t
*block_messages
(node_t
*head
);
77 static void add_cpxlat
(int lan
, int cpin
, int cpout
);
78 static cp_xlat_t
*find_cpxlat
(int lan
);
93 %token tSEVNAMES tFACNAMES tLANNAMES tBASE tCODEPAGE
94 %token tTYPEDEF tNL tSYMNAME tMSGEND
95 %token tSEVERITY tFACILITY tLANGUAGE tMSGID
96 %token
<str
> tIDENT tLINE tFILE tCOMMENT
100 %type
<str
> alias lines
101 %type
<num
> optcp id msgid clan
104 %type
<msg
> bodies msg
109 if
(!check_languages
(nodehead
))
110 xyyerror
("No messages defined");
111 lanblockhead
= block_messages
(nodehead
);
120 | msg
{ add_node
(nd_msg
, $1); }
121 | tCOMMENT
{ add_node
(nd_comment
, $1); }
122 |
error { xyyerror
(err_syntax
); /* `Catch all' error */ }
125 global
: tSEVNAMES
'=' '(' smaps
')'
126 | tSEVNAMES
'=' '(' smaps
error { xyyerror
(err_pclose
); }
127 | tSEVNAMES
'=' error { xyyerror
(err_popen
); }
128 | tSEVNAMES
error { xyyerror
(err_assign
); }
129 | tFACNAMES
'=' '(' fmaps
')'
130 | tFACNAMES
'=' '(' fmaps
error { xyyerror
(err_pclose
); }
131 | tFACNAMES
'=' error { xyyerror
(err_popen
); }
132 | tFACNAMES
error { xyyerror
(err_assign
); }
133 | tLANNAMES
'=' '(' lmaps
')'
134 | tLANNAMES
'=' '(' lmaps
error { xyyerror
(err_pclose
); }
135 | tLANNAMES
'=' error { xyyerror
(err_popen
); }
136 | tLANNAMES
error { xyyerror
(err_assign
); }
137 | tCODEPAGE
'=' '(' cmaps
')'
138 | tCODEPAGE
'=' '(' cmaps
error { xyyerror
(err_pclose
); }
139 | tCODEPAGE
'=' error { xyyerror
(err_popen
); }
140 | tCODEPAGE
error { xyyerror
(err_assign
); }
141 | tTYPEDEF
'=' tIDENT
{ cast
= $3; }
142 | tTYPEDEF
'=' error { xyyerror
(err_number
); }
143 | tTYPEDEF
error { xyyerror
(err_assign
); }
144 | tBASE
'=' tNUMBER
{
153 xyyerror
("Numberbase must be 8, 10 or 16");
156 | tBASE
'=' error { xyyerror
(err_number
); }
157 | tBASE
error { xyyerror
(err_assign
); }
160 /*----------------------------------------------------------------------
161 * SeverityNames mapping
165 |
error { xyyerror
(err_ident
); }
168 smap
: token
'=' tNUMBER alias
{
172 xyyerror
("Severity value out of range (0x%08x > 0x3)", $3);
173 do_add_token
(tok_severity
, $1, "severity");
175 | token
'=' error { xyyerror
(err_number
); }
176 | token
error { xyyerror
(err_assign
); }
179 /*----------------------------------------------------------------------
180 * FacilityNames mapping
184 |
error { xyyerror
(err_ident
); }
187 fmap
: token
'=' tNUMBER alias
{
191 xyyerror
("Facility value out of range (0x%08x > 0xfff)", $3);
192 do_add_token
(tok_facility
, $1, "facility");
194 | token
'=' error { xyyerror
(err_number
); }
195 | token
error { xyyerror
(err_assign
); }
198 alias
: /* Empty */ { $$
= NULL
; }
199 |
':' tIDENT
{ $$
= $2; }
200 |
':' error { xyyerror
(err_ident
); }
203 /*----------------------------------------------------------------------
204 * LanguageNames mapping
208 |
error { xyyerror
(err_ident
); }
211 lmap
: token
'=' tNUMBER setfile
':' tFILE optcp
{
215 do_add_token
(tok_language
, $1, "language");
216 if
(!find_language
($1->token
) && !find_cpxlat
($1->token
))
217 yywarning
("Language 0x%x not built-in, using codepage %d; use explicit codepage to override", $1->token
, WMC_DEFAULT_CODEPAGE
);
219 | token
'=' tNUMBER setfile
':' error { xyyerror
("Filename expected"); }
220 | token
'=' tNUMBER
error { xyyerror
(err_colon
); }
221 | token
'=' error { xyyerror
(err_number
); }
222 | token
error { xyyerror
(err_assign
); }
225 optcp
: /* Empty */ { $$
= 0; }
226 |
':' tNUMBER
{ $$
= $2; }
227 |
':' error { xyyerror
("Codepage-number expected"); }
230 /*----------------------------------------------------------------------
235 |
error { xyyerror
(err_ident
); }
238 cmap
: clan
'=' tNUMBER
':' tNUMBER
{
239 static const char err_nocp
[] = "Codepage %d not builtin; cannot convert";
241 xyyerror
("Codepage translation already defined for language 0x%x", $1);
242 if
($3 && !find_codepage
($3))
243 xyyerror
(err_nocp
, $3);
244 if
($5 && !find_codepage
($5))
245 xyyerror
(err_nocp
, $5);
246 add_cpxlat
($1, $3, $5);
248 | clan
'=' tNUMBER
':' error { xyyerror
(err_number
); }
249 | clan
'=' tNUMBER
error { xyyerror
(err_colon
); }
250 | clan
'=' error { xyyerror
(err_number
); }
251 | clan
error { xyyerror
(err_assign
); }
254 clan
: tNUMBER
{ $$
= $1; }
256 if
($1->type
!= tok_language
)
257 xyyerror
("Language name or code expected");
262 /*----------------------------------------------------------------------
263 * Message-definition parsing
265 msg
: msgid sevfacsym
{ test_id
($1); } bodies
{ $$
= complete_msg
($4, $1); }
268 msgid
: tMSGID
'=' id
{
270 xyyerror
("Message ID value out of range (0x%08x > 0xffff)", $3);
273 | tMSGID
error { xyyerror
(err_assign
); }
276 id
: /* Empty */ { $$
= ++last_id
; }
277 | tNUMBER
{ $$
= last_id
= $1; }
278 |
'+' tNUMBER
{ $$
= last_id
+= $2; }
279 |
'+' error { xyyerror
(err_number
); }
282 sevfacsym: /* Empty */ { have_sev
= have_fac
= have_sym
= 0; }
283 | sevfacsym sev
{ if
(have_sev
) xyyerror
("Severity already defined"); have_sev
= 1; }
284 | sevfacsym fac
{ if
(have_fac
) xyyerror
("Facility already defined"); have_fac
= 1; }
285 | sevfacsym sym
{ if
(have_sym
) xyyerror
("Symbolname already defined"); have_sym
= 1; }
288 sym
: tSYMNAME
'=' tIDENT
{ last_sym
= $3; }
289 | tSYMNAME
'=' error { xyyerror
(err_ident
); }
290 | tSYMNAME
error { xyyerror
(err_assign
); }
293 sev
: tSEVERITY
'=' token
{
294 token_t
*tok
= lookup_token
($3->name
);
296 xyyerror
("Undefined severityname");
297 if
(tok
->type
!= tok_severity
)
298 xyyerror
("Identifier is not of class 'severity'");
299 last_sev
= tok
->token
;
301 | tSEVERITY
'=' error { xyyerror
(err_ident
); }
302 | tSEVERITY
error { xyyerror
(err_assign
); }
305 fac
: tFACILITY
'=' token
{
306 token_t
*tok
= lookup_token
($3->name
);
308 xyyerror
("Undefined facilityname");
309 if
(tok
->type
!= tok_facility
)
310 xyyerror
("Identifier is not of class 'facility'");
311 last_fac
= tok
->token
;
313 | tFACILITY
'=' error { xyyerror
(err_ident
); }
314 | tFACILITY
error { xyyerror
(err_assign
); }
317 /*----------------------------------------------------------------------
318 * Message-text parsing
320 bodies
: body
{ $$
= add_lanmsg
(NULL
, $1); }
321 | bodies body
{ $$
= add_lanmsg
($1, $2); }
322 |
error { xyyerror
("'Language=...' (start of message text-definition) expected"); }
325 body
: lang setline lines tMSGEND
{ $$
= new_lanmsg
(&$1, $3); }
329 * The newline is to be able to set the codepage
330 * to the language based codepage for the next
331 * message to be parsed.
333 lang
: tLANGUAGE setnl
'=' token tNL
{
334 token_t
*tok
= lookup_token
($4->name
);
337 xyyerror
("Undefined language");
338 if
(tok
->type
!= tok_language
)
339 xyyerror
("Identifier is not of class 'language'");
340 if
((cpx
= find_cpxlat
(tok
->token
)))
342 set_codepage
($$.codepage
= cpx
->cpin
);
344 else if
(!tok
->codepage
)
346 const language_t
*lan
= find_language
(tok
->token
);
349 /* Just set default; warning was given while parsing languagenames */
350 set_codepage
($$.codepage
= WMC_DEFAULT_CODEPAGE
);
354 /* The default seems to be to use the DOS codepage... */
355 set_codepage
($$.codepage
= lan
->doscp
);
359 set_codepage
($$.codepage
= tok
->codepage
);
360 $$.language
= tok
->token
;
362 | tLANGUAGE setnl
'=' token
error { xyyerror
("Missing newline"); }
363 | tLANGUAGE setnl
'=' error { xyyerror
(err_ident
); }
364 | tLANGUAGE
error { xyyerror
(err_assign
); }
367 lines
: tLINE
{ $$
= $1; }
368 | lines tLINE
{ $$
= merge
($1, $2); }
369 |
error { xyyerror
(err_msg
); }
370 | lines
error { xyyerror
(err_msg
); }
373 /*----------------------------------------------------------------------
376 token
: tIDENT
{ $$
= xmalloc
(sizeof
(token_t
)); $$
->name
= $1; }
377 | tTOKEN
{ $$
= $1; }
380 setnl
: /* Empty */ { want_nl
= 1; }
383 setline
: /* Empty */ { want_line
= 1; }
386 setfile
: /* Empty */ { want_file
= 1; }
391 static WCHAR
*merge
(WCHAR
*s1
, WCHAR
*s2
)
393 int l1
= unistrlen
(s1
);
394 int l2
= unistrlen
(s2
);
395 s1
= xrealloc
(s1
, (l1
+ l2
+ 1) * sizeof
(*s1
));
396 unistrcpy
(s1
+l1
, s2
);
401 static void do_add_token
(tok_e type
, token_t
*tok
, const char *code
)
403 token_t
*tp
= lookup_token
(tok
->name
);
406 if
(tok
->type
!= type
)
407 yywarning
("Type change in token");
409 xyyerror
("Overlapping token not the same");
410 /* else its already defined and changed */
412 xyyerror
("Redefinition of %s", code
);
417 add_token
(type
, tok
->name
, tok
->token
, tok
->codepage
, tok
->alias
, 1);
422 static lanmsg_t
*new_lanmsg
(lan_cp_t
*lcp
, WCHAR
*msg
)
424 lanmsg_t
*lmp
= (lanmsg_t
*)xmalloc
(sizeof
(lanmsg_t
));
425 lmp
->lan
= lcp
->language
;
426 lmp
->cp
= lcp
->codepage
;
428 lmp
->len
= unistrlen
(msg
) + 1; /* Include termination */
430 yywarning
("Message exceptionally long; might be a missing termination");
434 static msg_t
*add_lanmsg
(msg_t
*msg
, lanmsg_t
*lanmsg
)
438 msg
= xmalloc
(sizeof
(msg_t
));
439 msg
->msgs
= xrealloc
(msg
->msgs
, (msg
->nmsgs
+1) * sizeof
(*(msg
->msgs
)));
440 msg
->msgs
[msg
->nmsgs
] = lanmsg
;
442 for
(i
= 0; i
< msg
->nmsgs
-1; i
++)
444 if
(msg
->msgs
[i
]->lan
== lanmsg
->lan
)
445 xyyerror
("Message for language 0x%x already defined", lanmsg
->lan
);
450 static int sort_lanmsg
(const void *p1
, const void *p2
)
452 return
(*(lanmsg_t
**)p1
)->lan
- (*(lanmsg_t
**)p2
)->lan
;
455 static msg_t
*complete_msg
(msg_t
*mp
, int id
)
462 xyyerror
("No symbolic name defined for message id %d", id
);
465 qsort
(mp
->msgs
, mp
->nmsgs
, sizeof
(*(mp
->msgs
)), sort_lanmsg
);
466 mp
->realid
= id |
(last_sev
<< 30) |
(last_fac
<< 16);
468 mp
->realid |
= 1 << 29;
474 static void add_node
(node_e type
, void *p
)
476 node_t
*ndp
= (node_t
*)xmalloc
(sizeof
(node_t
));
482 ndp
->prev
= nodetail
;
483 nodetail
->next
= ndp
;
488 nodehead
= nodetail
= ndp
;
492 static void test_id
(int id
)
495 for
(ndp
= nodehead
; ndp
; ndp
= ndp
->next
)
497 if
(ndp
->type
!= nd_msg
)
499 if
(ndp
->u.msg
->id
== id
&& ndp
->u.msg
->sev
== last_sev
&& ndp
->u.msg
->fac
== last_fac
)
500 xyyerror
("MessageId %d with facility 0x%x and severity 0x%x already defined", id
, last_fac
, last_sev
);
504 static int check_languages
(node_t
*head
)
506 static char err_missing
[] = "Missing definition for language 0x%x; MessageID %d, facility 0x%x, severity 0x%x";
511 for
(ndp
= head
; ndp
; ndp
= ndp
->next
)
513 if
(ndp
->type
!= nd_msg
)
524 if
(ndp
->u.msg
->nmsgs
> msg
->nmsgs
)
535 for
(i
= 0; i
< m1
->nmsgs
; i
++)
538 error(err_missing
, m1
->msgs
[i
]->lan
, m2
->id
, m2
->fac
, m2
->sev
);
539 else if
(m1
->msgs
[i
]->lan
< m2
->msgs
[i
]->lan
)
540 error(err_missing
, m1
->msgs
[i
]->lan
, m2
->id
, m2
->fac
, m2
->sev
);
541 else if
(m1
->msgs
[i
]->lan
> m2
->msgs
[i
]->lan
)
542 error(err_missing
, m2
->msgs
[i
]->lan
, m1
->id
, m1
->fac
, m1
->sev
);
550 #define MSGRID(x) ((*(msg_t **)(x))->realid)
551 static int sort_msg
(const void *p1
, const void *p2
)
553 return MSGRID
(p1
) > MSGRID
(p2
) ?
1 : (MSGRID
(p1
) == MSGRID
(p2
) ?
0 : -1);
554 /* return (*(msg_t **)p1)->realid - (*(msg_t **)p1)->realid; */
558 * block_messages() basically transposes the messages
559 * from ID/language based list to a language/ID
562 static lan_blk_t
*block_messages
(node_t
*head
)
565 lan_blk_t
*lblktail
= NULL
;
566 lan_blk_t
*lblkhead
= NULL
;
567 msg_t
**msgtab
= NULL
;
572 int factor
= unicodeout ?
2 : 1;
574 for
(ndp
= head
; ndp
; ndp
= ndp
->next
)
576 if
(ndp
->type
!= nd_msg
)
578 msgtab
= xrealloc
(msgtab
, (nmsg
+1) * sizeof
(*msgtab
));
579 msgtab
[nmsg
++] = ndp
->u.msg
;
583 qsort
(msgtab
, nmsg
, sizeof
(*msgtab
), sort_msg
);
585 for
(nl
= 0; nl
< msgtab
[0]->nmsgs
; nl
++) /* This should be equal for all after check_languages() */
587 lbp
= xmalloc
(sizeof
(lan_blk_t
));
591 lblkhead
= lblktail
= lbp
;
595 lblktail
->next
= lbp
;
596 lbp
->prev
= lblktail
;
600 lbp
->blks
= xmalloc
(sizeof
(*lbp
->blks
));
601 lbp
->blks
[0].idlo
= msgtab
[0]->realid
;
602 lbp
->blks
[0].idhi
= msgtab
[0]->realid
;
603 /* The plus 4 is the entry header; (+3)&~3 is DWORD alignment */
604 lbp
->blks
[0].size
= ((factor
* msgtab
[0]->msgs
[nl
]->len
+ 3) & ~
3) + 4;
605 lbp
->blks
[0].msgs
= xmalloc
(sizeof
(*lbp
->blks
[0].msgs
));
606 lbp
->blks
[0].nmsg
= 1;
607 lbp
->blks
[0].msgs
[0] = msgtab
[0]->msgs
[nl
];
608 lbp
->lan
= msgtab
[0]->msgs
[nl
]->lan
;
610 for
(i
= 1; i
< nmsg
; i
++)
612 block_t
*blk
= &(lbp
->blks
[lbp
->nblk
-1]);
613 if
(msgtab
[i
]->realid
== blk
->idhi
+1)
615 blk
->size
+= ((factor
* msgtab
[i
]->msgs
[nl
]->len
+ 3) & ~
3) + 4;
617 blk
->msgs
= xrealloc
(blk
->msgs
, (blk
->nmsg
+1) * sizeof
(*blk
->msgs
));
618 blk
->msgs
[blk
->nmsg
++] = msgtab
[i
]->msgs
[nl
];
623 lbp
->blks
= xrealloc
(lbp
->blks
, lbp
->nblk
* sizeof
(*lbp
->blks
));
624 blk
= &(lbp
->blks
[lbp
->nblk
-1]);
625 blk
->idlo
= msgtab
[i
]->realid
;
626 blk
->idhi
= msgtab
[i
]->realid
;
627 blk
->size
= ((factor
* msgtab
[i
]->msgs
[nl
]->len
+ 3) & ~
3) + 4;
628 blk
->msgs
= xmalloc
(sizeof
(*blk
->msgs
));
630 blk
->msgs
[0] = msgtab
[i
]->msgs
[nl
];
638 static int sc_xlat
(const void *p1
, const void *p2
)
640 return
((cp_xlat_t
*)p1
)->lan
- ((cp_xlat_t
*)p2
)->lan
;
643 static void add_cpxlat
(int lan
, int cpin
, int cpout
)
645 cpxlattab
= xrealloc
(cpxlattab
, (ncpxlattab
+1) * sizeof
(*cpxlattab
));
646 cpxlattab
[ncpxlattab
].lan
= lan
;
647 cpxlattab
[ncpxlattab
].cpin
= cpin
;
648 cpxlattab
[ncpxlattab
].cpout
= cpout
;
650 qsort
(cpxlattab
, ncpxlattab
, sizeof
(*cpxlattab
), sc_xlat
);
653 static cp_xlat_t
*find_cpxlat
(int lan
)
657 if
(!cpxlattab
) return NULL
;
660 return
(cp_xlat_t
*)bsearch
(&t
, cpxlattab
, ncpxlattab
, sizeof
(*cpxlattab
), sc_xlat
);