2 * Wine Message Compiler parser
4 * Copyright 2000 Bertho A. Stultiens (BS)
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * The basic grammar of the file is yet another example of, humpf,
23 * design. There is a mix of context-insensitive and -sensitive
24 * stuff, which makes it rather complicated.
25 * The header definitions are all context-insensitive because they have
26 * delimited arguments, whereas the message headers are (semi-) context-
27 * sensitive and the messages themselves are, well, RFC82[12] delimited.
28 * This mixture seems to originate from the time that ms and ibm were
29 * good friends and developing os/2 according to the "compatibility"
30 * switch and reading some comments here and there.
32 * I'll ignore most of the complications and concentrate on the concept
33 * which allows me to use yacc. Basically, everything is context-
34 * insensitive now, with the exception of the message-text itself and
35 * the preceding language declaration.
51 static const char err_syntax
[] = "Syntax error";
52 static const char err_number
[] = "Number expected";
53 static const char err_ident
[] = "Identifier expected";
54 static const char err_assign
[] = "'=' expected";
55 static const char err_popen
[] = "'(' expected";
56 static const char err_pclose
[] = "')' expected";
57 static const char err_colon
[] = "':' expected";
58 static const char err_msg
[] = "Message expected";
60 /* Scanner switches */
61 int want_nl
= 0; /* Request next newlinw */
62 int want_line
= 0; /* Request next complete line */
63 int want_file
= 0; /* Request next ident as filename */
65 node_t
*nodehead
= NULL
; /* The list of all parsed elements */
66 static node_t
*nodetail
= NULL
;
67 lan_blk_t
*lanblockhead
; /* List of parsed elements transposed */
69 static int base
= 16; /* Current printout base to use (8, 10 or 16) */
70 static WCHAR
*cast
= NULL
; /* Current typecast to use */
72 static int last_id
= 0; /* The last message ID parsed */
73 static int last_sev
= 0; /* Last severity code parsed */
74 static int last_fac
= 0; /* Last facility code parsed */
75 static WCHAR
*last_sym
= NULL
;/* Last alias symbol parsed */
76 static int have_sev
; /* Set if severity parsed for current message */
77 static int have_fac
; /* Set if facility parsed for current message */
78 static int have_sym
; /* Set is symbol parsed for current message */
80 static cp_xlat_t
*cpxlattab
= NULL
; /* Codepage translation table */
81 static int ncpxlattab
= 0;
84 static WCHAR
*merge
(WCHAR
*s1
, WCHAR
*s2
);
85 static lanmsg_t
*new_lanmsg
(lan_cp_t
*lcp
, WCHAR
*msg
);
86 static msg_t
*add_lanmsg
(msg_t
*msg
, lanmsg_t
*lanmsg
);
87 static msg_t
*complete_msg
(msg_t
*msg
, int id
);
88 static void add_node
(node_e type
, void *p
);
89 static void do_add_token
(tok_e type
, token_t
*tok
, const char *code
);
90 static void test_id
(int id
);
91 static int check_languages
(node_t
*head
);
92 static lan_blk_t
*block_messages
(node_t
*head
);
93 static void add_cpxlat
(int lan
, int cpin
, int cpout
);
94 static cp_xlat_t
*find_cpxlat
(int lan
);
108 %token tSEVNAMES tFACNAMES tLANNAMES tBASE tCODEPAGE
109 %token tTYPEDEF tNL tSYMNAME tMSGEND
110 %token tSEVERITY tFACILITY tLANGUAGE tMSGID
111 %token
<str
> tIDENT tLINE tFILE tCOMMENT
115 %type
<str
> alias lines
116 %type
<num
> optcp id msgid clan
119 %type
<msg
> bodies msg
124 if
(!check_languages
(nodehead
))
125 xyyerror
("No messages defined");
126 lanblockhead
= block_messages
(nodehead
);
135 | msg
{ add_node
(nd_msg
, $1); }
136 | tCOMMENT
{ add_node
(nd_comment
, $1); }
137 |
error { xyyerror
(err_syntax
); /* `Catch all' error */ }
140 global
: tSEVNAMES
'=' '(' smaps
')'
141 | tSEVNAMES
'=' '(' smaps
error { xyyerror
(err_pclose
); }
142 | tSEVNAMES
'=' error { xyyerror
(err_popen
); }
143 | tSEVNAMES
error { xyyerror
(err_assign
); }
144 | tFACNAMES
'=' '(' fmaps
')'
145 | tFACNAMES
'=' '(' fmaps
error { xyyerror
(err_pclose
); }
146 | tFACNAMES
'=' error { xyyerror
(err_popen
); }
147 | tFACNAMES
error { xyyerror
(err_assign
); }
148 | tLANNAMES
'=' '(' lmaps
')'
149 | tLANNAMES
'=' '(' lmaps
error { xyyerror
(err_pclose
); }
150 | tLANNAMES
'=' error { xyyerror
(err_popen
); }
151 | tLANNAMES
error { xyyerror
(err_assign
); }
152 | tCODEPAGE
'=' '(' cmaps
')'
153 | tCODEPAGE
'=' '(' cmaps
error { xyyerror
(err_pclose
); }
154 | tCODEPAGE
'=' error { xyyerror
(err_popen
); }
155 | tCODEPAGE
error { xyyerror
(err_assign
); }
156 | tTYPEDEF
'=' tIDENT
{ cast
= $3; }
157 | tTYPEDEF
'=' error { xyyerror
(err_number
); }
158 | tTYPEDEF
error { xyyerror
(err_assign
); }
159 | tBASE
'=' tNUMBER
{
168 xyyerror
("Numberbase must be 8, 10 or 16");
171 | tBASE
'=' error { xyyerror
(err_number
); }
172 | tBASE
error { xyyerror
(err_assign
); }
175 /*----------------------------------------------------------------------
176 * SeverityNames mapping
180 |
error { xyyerror
(err_ident
); }
183 smap
: token
'=' tNUMBER alias
{
187 xyyerror
("Severity value out of range (0x%08x > 0x3)", $3);
188 do_add_token
(tok_severity
, $1, "severity");
190 | token
'=' error { xyyerror
(err_number
); }
191 | token
error { xyyerror
(err_assign
); }
194 /*----------------------------------------------------------------------
195 * FacilityNames mapping
199 |
error { xyyerror
(err_ident
); }
202 fmap
: token
'=' tNUMBER alias
{
206 xyyerror
("Facility value out of range (0x%08x > 0xfff)", $3);
207 do_add_token
(tok_facility
, $1, "facility");
209 | token
'=' error { xyyerror
(err_number
); }
210 | token
error { xyyerror
(err_assign
); }
213 alias
: /* Empty */ { $$
= NULL
; }
214 |
':' tIDENT
{ $$
= $2; }
215 |
':' error { xyyerror
(err_ident
); }
218 /*----------------------------------------------------------------------
219 * LanguageNames mapping
223 |
error { xyyerror
(err_ident
); }
226 lmap
: token
'=' tNUMBER setfile
':' tFILE optcp
{
230 do_add_token
(tok_language
, $1, "language");
231 if
(!find_language
($3) && !find_cpxlat
($3))
232 mcy_warning
("Language 0x%x not built-in, using codepage %d; use explicit codepage to override", $3, WMC_DEFAULT_CODEPAGE
);
234 | token
'=' tNUMBER setfile
':' error { xyyerror
("Filename expected"); }
235 | token
'=' tNUMBER
error { xyyerror
(err_colon
); }
236 | token
'=' error { xyyerror
(err_number
); }
237 | token
error { xyyerror
(err_assign
); }
240 optcp
: /* Empty */ { $$
= 0; }
241 |
':' tNUMBER
{ $$
= $2; }
242 |
':' error { xyyerror
("Codepage-number expected"); }
245 /*----------------------------------------------------------------------
250 |
error { xyyerror
(err_ident
); }
253 cmap
: clan
'=' tNUMBER
':' tNUMBER
{
254 static const char err_nocp
[] = "Codepage %d not builtin; cannot convert";
256 xyyerror
("Codepage translation already defined for language 0x%x", $1);
257 if
($3 && !find_codepage
($3))
258 xyyerror
(err_nocp
, $3);
259 if
($5 && !find_codepage
($5))
260 xyyerror
(err_nocp
, $5);
261 add_cpxlat
($1, $3, $5);
263 | clan
'=' tNUMBER
':' error { xyyerror
(err_number
); }
264 | clan
'=' tNUMBER
error { xyyerror
(err_colon
); }
265 | clan
'=' error { xyyerror
(err_number
); }
266 | clan
error { xyyerror
(err_assign
); }
269 clan
: tNUMBER
{ $$
= $1; }
271 if
($1->type
!= tok_language
)
272 xyyerror
("Language name or code expected");
277 /*----------------------------------------------------------------------
278 * Message-definition parsing
280 msg
: msgid sevfacsym
{ test_id
($1); } bodies
{ $$
= complete_msg
($4, $1); }
283 msgid
: tMSGID
'=' id
{
285 xyyerror
("Message ID value out of range (0x%08x > 0xffff)", $3);
288 | tMSGID
error { xyyerror
(err_assign
); }
291 id
: /* Empty */ { $$
= ++last_id
; }
292 | tNUMBER
{ $$
= last_id
= $1; }
293 |
'+' tNUMBER
{ $$
= last_id
+= $2; }
294 |
'+' error { xyyerror
(err_number
); }
297 sevfacsym: /* Empty */ { have_sev
= have_fac
= have_sym
= 0; }
298 | sevfacsym sev
{ if
(have_sev
) xyyerror
("Severity already defined"); have_sev
= 1; }
299 | sevfacsym fac
{ if
(have_fac
) xyyerror
("Facility already defined"); have_fac
= 1; }
300 | sevfacsym sym
{ if
(have_sym
) xyyerror
("Symbolname already defined"); have_sym
= 1; }
303 sym
: tSYMNAME
'=' tIDENT
{ last_sym
= $3; }
304 | tSYMNAME
'=' error { xyyerror
(err_ident
); }
305 | tSYMNAME
error { xyyerror
(err_assign
); }
308 sev
: tSEVERITY
'=' token
{
309 token_t
*tok
= lookup_token
($3->name
);
311 xyyerror
("Undefined severityname");
312 if
(tok
->type
!= tok_severity
)
313 xyyerror
("Identifier is not of class 'severity'");
314 last_sev
= tok
->token
;
316 | tSEVERITY
'=' error { xyyerror
(err_ident
); }
317 | tSEVERITY
error { xyyerror
(err_assign
); }
320 fac
: tFACILITY
'=' token
{
321 token_t
*tok
= lookup_token
($3->name
);
323 xyyerror
("Undefined facilityname");
324 if
(tok
->type
!= tok_facility
)
325 xyyerror
("Identifier is not of class 'facility'");
326 last_fac
= tok
->token
;
328 | tFACILITY
'=' error { xyyerror
(err_ident
); }
329 | tFACILITY
error { xyyerror
(err_assign
); }
332 /*----------------------------------------------------------------------
333 * Message-text parsing
335 bodies
: body
{ $$
= add_lanmsg
(NULL
, $1); }
336 | bodies body
{ $$
= add_lanmsg
($1, $2); }
337 |
error { xyyerror
("'Language=...' (start of message text-definition) expected"); }
340 body
: lang setline lines tMSGEND
{ $$
= new_lanmsg
(&$1, $3); }
344 * The newline is to be able to set the codepage
345 * to the language based codepage for the next
346 * message to be parsed.
348 lang
: tLANGUAGE setnl
'=' token tNL
{
349 token_t
*tok
= lookup_token
($4->name
);
352 xyyerror
("Undefined language");
353 if
(tok
->type
!= tok_language
)
354 xyyerror
("Identifier is not of class 'language'");
355 if
((cpx
= find_cpxlat
(tok
->token
)))
357 set_codepage
($$.codepage
= cpx
->cpin
);
359 else if
(!tok
->codepage
)
361 const language_t
*lan
= find_language
(tok
->token
);
364 /* Just set default; warning was given while parsing languagenames */
365 set_codepage
($$.codepage
= WMC_DEFAULT_CODEPAGE
);
369 /* The default seems to be to use the DOS codepage... */
370 set_codepage
($$.codepage
= lan
->doscp
);
374 set_codepage
($$.codepage
= tok
->codepage
);
375 $$.language
= tok
->token
;
377 | tLANGUAGE setnl
'=' token
error { xyyerror
("Missing newline"); }
378 | tLANGUAGE setnl
'=' error { xyyerror
(err_ident
); }
379 | tLANGUAGE
error { xyyerror
(err_assign
); }
382 lines
: tLINE
{ $$
= $1; }
383 | lines tLINE
{ $$
= merge
($1, $2); }
384 |
error { xyyerror
(err_msg
); }
385 | lines
error { xyyerror
(err_msg
); }
388 /*----------------------------------------------------------------------
391 token
: tIDENT
{ $$
= xmalloc
(sizeof
(token_t
)); memset
($$
,0,sizeof
(*$$
)); $$
->name
= $1; }
392 | tTOKEN
{ $$
= $1; }
395 setnl
: /* Empty */ { want_nl
= 1; }
398 setline
: /* Empty */ { want_line
= 1; }
401 setfile
: /* Empty */ { want_file
= 1; }
406 static WCHAR
*merge
(WCHAR
*s1
, WCHAR
*s2
)
408 int l1
= unistrlen
(s1
);
409 int l2
= unistrlen
(s2
);
410 s1
= xrealloc
(s1
, (l1
+ l2
+ 1) * sizeof
(*s1
));
411 unistrcpy
(s1
+l1
, s2
);
416 static void do_add_token
(tok_e type
, token_t
*tok
, const char *code
)
418 token_t
*tp
= lookup_token
(tok
->name
);
421 if
(tok
->type
!= type
)
422 mcy_warning
("Type change in token");
424 xyyerror
("Overlapping token not the same");
425 /* else its already defined and changed */
427 xyyerror
("Redefinition of %s", code
);
432 add_token
(type
, tok
->name
, tok
->token
, tok
->codepage
, tok
->alias
, 1);
437 static lanmsg_t
*new_lanmsg
(lan_cp_t
*lcp
, WCHAR
*msg
)
439 lanmsg_t
*lmp
= xmalloc
(sizeof
(lanmsg_t
));
440 lmp
->lan
= lcp
->language
;
441 lmp
->cp
= lcp
->codepage
;
443 lmp
->len
= unistrlen
(msg
) + 1; /* Include termination */
445 mcy_warning
("Message exceptionally long; might be a missing termination");
449 static msg_t
*add_lanmsg
(msg_t
*msg
, lanmsg_t
*lanmsg
)
454 msg
= xmalloc
(sizeof
(msg_t
));
455 memset
( msg
, 0, sizeof
(*msg
) );
457 msg
->msgs
= xrealloc
(msg
->msgs
, (msg
->nmsgs
+1) * sizeof
(*(msg
->msgs
)));
458 msg
->msgs
[msg
->nmsgs
] = lanmsg
;
460 for
(i
= 0; i
< msg
->nmsgs
-1; i
++)
462 if
(msg
->msgs
[i
]->lan
== lanmsg
->lan
)
463 xyyerror
("Message for language 0x%x already defined", lanmsg
->lan
);
468 static int sort_lanmsg
(const void *p1
, const void *p2
)
470 return
(*(const lanmsg_t
* const *)p1
)->lan
- (*(const lanmsg_t
* const*)p2
)->lan
;
473 static msg_t
*complete_msg
(msg_t
*mp
, int id
)
480 xyyerror
("No symbolic name defined for message id %d", id
);
483 qsort
(mp
->msgs
, mp
->nmsgs
, sizeof
(*(mp
->msgs
)), sort_lanmsg
);
484 mp
->realid
= id |
(last_sev
<< 30) |
(last_fac
<< 16);
486 mp
->realid |
= 1 << 29;
492 static void add_node
(node_e type
, void *p
)
494 node_t
*ndp
= xmalloc
(sizeof
(node_t
));
495 memset
( ndp
, 0, sizeof
(*ndp
) );
501 ndp
->prev
= nodetail
;
502 nodetail
->next
= ndp
;
507 nodehead
= nodetail
= ndp
;
511 static void test_id
(int id
)
514 for
(ndp
= nodehead
; ndp
; ndp
= ndp
->next
)
516 if
(ndp
->type
!= nd_msg
)
518 if
(ndp
->u.msg
->id
== id
&& ndp
->u.msg
->sev
== last_sev
&& ndp
->u.msg
->fac
== last_fac
)
519 xyyerror
("MessageId %d with facility 0x%x and severity 0x%x already defined", id
, last_fac
, last_sev
);
523 static int check_languages
(node_t
*head
)
525 static const char err_missing
[] = "Missing definition for language 0x%x; MessageID %d, facility 0x%x, severity 0x%x";
530 for
(ndp
= head
; ndp
; ndp
= ndp
->next
)
532 if
(ndp
->type
!= nd_msg
)
543 if
(ndp
->u.msg
->nmsgs
> msg
->nmsgs
)
554 for
(i
= 0; i
< m1
->nmsgs
; i
++)
557 error(err_missing
, m1
->msgs
[i
]->lan
, m2
->id
, m2
->fac
, m2
->sev
);
558 else if
(m1
->msgs
[i
]->lan
< m2
->msgs
[i
]->lan
)
559 error(err_missing
, m1
->msgs
[i
]->lan
, m2
->id
, m2
->fac
, m2
->sev
);
560 else if
(m1
->msgs
[i
]->lan
> m2
->msgs
[i
]->lan
)
561 error(err_missing
, m2
->msgs
[i
]->lan
, m1
->id
, m1
->fac
, m1
->sev
);
569 #define MSGRID(x) ((*(const msg_t * const*)(x))->realid)
570 static int sort_msg
(const void *p1
, const void *p2
)
572 return MSGRID
(p1
) > MSGRID
(p2
) ?
1 : (MSGRID
(p1
) == MSGRID
(p2
) ?
0 : -1);
573 /* return (*(msg_t **)p1)->realid - (*(msg_t **)p1)->realid; */
577 * block_messages() basically transposes the messages
578 * from ID/language based list to a language/ID
581 static lan_blk_t
*block_messages
(node_t
*head
)
584 lan_blk_t
*lblktail
= NULL
;
585 lan_blk_t
*lblkhead
= NULL
;
586 msg_t
**msgtab
= NULL
;
591 int factor
= unicodeout ?
2 : 1;
593 for
(ndp
= head
; ndp
; ndp
= ndp
->next
)
595 if
(ndp
->type
!= nd_msg
)
597 msgtab
= xrealloc
(msgtab
, (nmsg
+1) * sizeof
(*msgtab
));
598 msgtab
[nmsg
++] = ndp
->u.msg
;
602 qsort
(msgtab
, nmsg
, sizeof
(*msgtab
), sort_msg
);
604 for
(nl
= 0; nl
< msgtab
[0]->nmsgs
; nl
++) /* This should be equal for all after check_languages() */
606 lbp
= xmalloc
(sizeof
(lan_blk_t
));
607 memset
( lbp
, 0, sizeof
(*lbp
) );
610 lblkhead
= lblktail
= lbp
;
614 lblktail
->next
= lbp
;
615 lbp
->prev
= lblktail
;
619 lbp
->blks
= xmalloc
(sizeof
(*lbp
->blks
));
620 lbp
->blks
[0].idlo
= msgtab
[0]->realid
;
621 lbp
->blks
[0].idhi
= msgtab
[0]->realid
;
622 /* The plus 4 is the entry header; (+3)&~3 is DWORD alignment */
623 lbp
->blks
[0].size
= ((factor
* msgtab
[0]->msgs
[nl
]->len
+ 3) & ~
3) + 4;
624 lbp
->blks
[0].msgs
= xmalloc
(sizeof
(*lbp
->blks
[0].msgs
));
625 lbp
->blks
[0].nmsg
= 1;
626 lbp
->blks
[0].msgs
[0] = msgtab
[0]->msgs
[nl
];
627 lbp
->lan
= msgtab
[0]->msgs
[nl
]->lan
;
629 for
(i
= 1; i
< nmsg
; i
++)
631 block_t
*blk
= &(lbp
->blks
[lbp
->nblk
-1]);
632 if
(msgtab
[i
]->realid
== blk
->idhi
+1)
634 blk
->size
+= ((factor
* msgtab
[i
]->msgs
[nl
]->len
+ 3) & ~
3) + 4;
636 blk
->msgs
= xrealloc
(blk
->msgs
, (blk
->nmsg
+1) * sizeof
(*blk
->msgs
));
637 blk
->msgs
[blk
->nmsg
++] = msgtab
[i
]->msgs
[nl
];
642 lbp
->blks
= xrealloc
(lbp
->blks
, lbp
->nblk
* sizeof
(*lbp
->blks
));
643 blk
= &(lbp
->blks
[lbp
->nblk
-1]);
644 blk
->idlo
= msgtab
[i
]->realid
;
645 blk
->idhi
= msgtab
[i
]->realid
;
646 blk
->size
= ((factor
* msgtab
[i
]->msgs
[nl
]->len
+ 3) & ~
3) + 4;
647 blk
->msgs
= xmalloc
(sizeof
(*blk
->msgs
));
649 blk
->msgs
[0] = msgtab
[i
]->msgs
[nl
];
657 static int sc_xlat
(const void *p1
, const void *p2
)
659 return
((const cp_xlat_t
*)p1
)->lan
- ((const cp_xlat_t
*)p2
)->lan
;
662 static void add_cpxlat
(int lan
, int cpin
, int cpout
)
664 cpxlattab
= xrealloc
(cpxlattab
, (ncpxlattab
+1) * sizeof
(*cpxlattab
));
665 cpxlattab
[ncpxlattab
].lan
= lan
;
666 cpxlattab
[ncpxlattab
].cpin
= cpin
;
667 cpxlattab
[ncpxlattab
].cpout
= cpout
;
669 qsort
(cpxlattab
, ncpxlattab
, sizeof
(*cpxlattab
), sc_xlat
);
672 static cp_xlat_t
*find_cpxlat
(int lan
)
676 if
(!cpxlattab
) return NULL
;
679 return
(cp_xlat_t
*)bsearch
(&t
, cpxlattab
, ncpxlattab
, sizeof
(*cpxlattab
), sc_xlat
);