2 * Wine Message Compiler parser
4 * Copyright 2000 Bertho A. Stultiens (BS)
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 * The basic grammar of the file is yet another example of, humpf,
23 * design. There is is mix of context-insensitive and -sentitive
24 * stuff, which makes it rather complicated.
25 * The header definitions are all context-insensitive because they have
26 * delimited arguments, whereas the message headers are (semi-) context-
27 * sensitive and the messages themselves are, well, RFC82[12] delimited.
28 * This mixture seems to originate from the time that ms and ibm were
29 * good friends and developing os/2 according to the "compatibility"
30 * switch and reading some comments here and there.
32 * I'll ignore most of the complications and concentrate on the concept
33 * which allows me to use yacc. Basically, everything is context-
34 * insensitive now, with the exception of the message-text itself and
35 * the preceding language declaration.
51 static const char err_syntax
[] = "Syntax error";
52 static const char err_number
[] = "Number expected";
53 static const char err_ident
[] = "Identifier expected";
54 static const char err_assign
[] = "'=' expected";
55 static const char err_popen
[] = "'(' expected";
56 static const char err_pclose
[] = "')' expected";
57 static const char err_colon
[] = "':' expected";
58 static const char err_msg
[] = "Message expected";
60 /* Scanner switches */
61 int want_nl
= 0; /* Request next newlinw */
62 int want_line
= 0; /* Request next complete line */
63 int want_file
= 0; /* Request next ident as filename */
65 node_t
*nodehead
= NULL
; /* The list of all parsed elements */
66 static node_t
*nodetail
= NULL
;
67 lan_blk_t
*lanblockhead
; /* List of parsed elements transposed */
69 static int base
= 16; /* Current printout base to use (8, 10 or 16) */
70 static WCHAR
*cast
= NULL
; /* Current typecast to use */
72 static int last_id
= 0; /* The last message ID parsed */
73 static int last_sev
= 0; /* Last severity code parsed */
74 static int last_fac
= 0; /* Last facility code parsed */
75 static WCHAR
*last_sym
= NULL
;/* Last alias symbol parsed */
76 static int have_sev
; /* Set if severity parsed for current message */
77 static int have_fac
; /* Set if facility parsed for current message */
78 static int have_sym
; /* Set is symbol parsed for current message */
80 static cp_xlat_t
*cpxlattab
= NULL
; /* Codepage translation table */
81 static int ncpxlattab
= 0;
84 static WCHAR
*merge
(WCHAR
*s1
, WCHAR
*s2
);
85 static lanmsg_t
*new_lanmsg
(lan_cp_t
*lcp
, WCHAR
*msg
);
86 static msg_t
*add_lanmsg
(msg_t
*msg
, lanmsg_t
*lanmsg
);
87 static msg_t
*complete_msg
(msg_t
*msg
, int id
);
88 static void add_node
(node_e type
, void *p
);
89 static void do_add_token
(tok_e type
, token_t
*tok
, const char *code
);
90 static void test_id
(int id
);
91 static int check_languages
(node_t
*head
);
92 static lan_blk_t
*block_messages
(node_t
*head
);
93 static void add_cpxlat
(int lan
, int cpin
, int cpout
);
94 static cp_xlat_t
*find_cpxlat
(int lan
);
109 %token tSEVNAMES tFACNAMES tLANNAMES tBASE tCODEPAGE
110 %token tTYPEDEF tNL tSYMNAME tMSGEND
111 %token tSEVERITY tFACILITY tLANGUAGE tMSGID
112 %token
<str
> tIDENT tLINE tFILE tCOMMENT
116 %type
<str
> alias lines
117 %type
<num
> optcp id msgid clan
120 %type
<msg
> bodies msg
125 if
(!check_languages
(nodehead
))
126 xyyerror
("No messages defined");
127 lanblockhead
= block_messages
(nodehead
);
136 | msg
{ add_node
(nd_msg
, $1); }
137 | tCOMMENT
{ add_node
(nd_comment
, $1); }
138 |
error { xyyerror
(err_syntax
); /* `Catch all' error */ }
141 global
: tSEVNAMES
'=' '(' smaps
')'
142 | tSEVNAMES
'=' '(' smaps
error { xyyerror
(err_pclose
); }
143 | tSEVNAMES
'=' error { xyyerror
(err_popen
); }
144 | tSEVNAMES
error { xyyerror
(err_assign
); }
145 | tFACNAMES
'=' '(' fmaps
')'
146 | tFACNAMES
'=' '(' fmaps
error { xyyerror
(err_pclose
); }
147 | tFACNAMES
'=' error { xyyerror
(err_popen
); }
148 | tFACNAMES
error { xyyerror
(err_assign
); }
149 | tLANNAMES
'=' '(' lmaps
')'
150 | tLANNAMES
'=' '(' lmaps
error { xyyerror
(err_pclose
); }
151 | tLANNAMES
'=' error { xyyerror
(err_popen
); }
152 | tLANNAMES
error { xyyerror
(err_assign
); }
153 | tCODEPAGE
'=' '(' cmaps
')'
154 | tCODEPAGE
'=' '(' cmaps
error { xyyerror
(err_pclose
); }
155 | tCODEPAGE
'=' error { xyyerror
(err_popen
); }
156 | tCODEPAGE
error { xyyerror
(err_assign
); }
157 | tTYPEDEF
'=' tIDENT
{ cast
= $3; }
158 | tTYPEDEF
'=' error { xyyerror
(err_number
); }
159 | tTYPEDEF
error { xyyerror
(err_assign
); }
160 | tBASE
'=' tNUMBER
{
169 xyyerror
("Numberbase must be 8, 10 or 16");
172 | tBASE
'=' error { xyyerror
(err_number
); }
173 | tBASE
error { xyyerror
(err_assign
); }
176 /*----------------------------------------------------------------------
177 * SeverityNames mapping
181 |
error { xyyerror
(err_ident
); }
184 smap
: token
'=' tNUMBER alias
{
188 xyyerror
("Severity value out of range (0x%08x > 0x3)", $3);
189 do_add_token
(tok_severity
, $1, "severity");
191 | token
'=' error { xyyerror
(err_number
); }
192 | token
error { xyyerror
(err_assign
); }
195 /*----------------------------------------------------------------------
196 * FacilityNames mapping
200 |
error { xyyerror
(err_ident
); }
203 fmap
: token
'=' tNUMBER alias
{
207 xyyerror
("Facility value out of range (0x%08x > 0xfff)", $3);
208 do_add_token
(tok_facility
, $1, "facility");
210 | token
'=' error { xyyerror
(err_number
); }
211 | token
error { xyyerror
(err_assign
); }
214 alias
: /* Empty */ { $$
= NULL
; }
215 |
':' tIDENT
{ $$
= $2; }
216 |
':' error { xyyerror
(err_ident
); }
219 /*----------------------------------------------------------------------
220 * LanguageNames mapping
224 |
error { xyyerror
(err_ident
); }
227 lmap
: token
'=' tNUMBER setfile
':' tFILE optcp
{
231 do_add_token
(tok_language
, $1, "language");
232 if
(!find_language
($3) && !find_cpxlat
($3))
233 yywarning
("Language 0x%x not built-in, using codepage %d; use explicit codepage to override", $3, WMC_DEFAULT_CODEPAGE
);
235 | token
'=' tNUMBER setfile
':' error { xyyerror
("Filename expected"); }
236 | token
'=' tNUMBER
error { xyyerror
(err_colon
); }
237 | token
'=' error { xyyerror
(err_number
); }
238 | token
error { xyyerror
(err_assign
); }
241 optcp
: /* Empty */ { $$
= 0; }
242 |
':' tNUMBER
{ $$
= $2; }
243 |
':' error { xyyerror
("Codepage-number expected"); }
246 /*----------------------------------------------------------------------
251 |
error { xyyerror
(err_ident
); }
254 cmap
: clan
'=' tNUMBER
':' tNUMBER
{
255 static const char err_nocp
[] = "Codepage %d not builtin; cannot convert";
257 xyyerror
("Codepage translation already defined for language 0x%x", $1);
258 if
($3 && !find_codepage
($3))
259 xyyerror
(err_nocp
, $3);
260 if
($5 && !find_codepage
($5))
261 xyyerror
(err_nocp
, $5);
262 add_cpxlat
($1, $3, $5);
264 | clan
'=' tNUMBER
':' error { xyyerror
(err_number
); }
265 | clan
'=' tNUMBER
error { xyyerror
(err_colon
); }
266 | clan
'=' error { xyyerror
(err_number
); }
267 | clan
error { xyyerror
(err_assign
); }
270 clan
: tNUMBER
{ $$
= $1; }
272 if
($1->type
!= tok_language
)
273 xyyerror
("Language name or code expected");
278 /*----------------------------------------------------------------------
279 * Message-definition parsing
281 msg
: msgid sevfacsym
{ test_id
($1); } bodies
{ $$
= complete_msg
($4, $1); }
284 msgid
: tMSGID
'=' id
{
286 xyyerror
("Message ID value out of range (0x%08x > 0xffff)", $3);
289 | tMSGID
error { xyyerror
(err_assign
); }
292 id
: /* Empty */ { $$
= ++last_id
; }
293 | tNUMBER
{ $$
= last_id
= $1; }
294 |
'+' tNUMBER
{ $$
= last_id
+= $2; }
295 |
'+' error { xyyerror
(err_number
); }
298 sevfacsym: /* Empty */ { have_sev
= have_fac
= have_sym
= 0; }
299 | sevfacsym sev
{ if
(have_sev
) xyyerror
("Severity already defined"); have_sev
= 1; }
300 | sevfacsym fac
{ if
(have_fac
) xyyerror
("Facility already defined"); have_fac
= 1; }
301 | sevfacsym sym
{ if
(have_sym
) xyyerror
("Symbolname already defined"); have_sym
= 1; }
304 sym
: tSYMNAME
'=' tIDENT
{ last_sym
= $3; }
305 | tSYMNAME
'=' error { xyyerror
(err_ident
); }
306 | tSYMNAME
error { xyyerror
(err_assign
); }
309 sev
: tSEVERITY
'=' token
{
310 token_t
*tok
= lookup_token
($3->name
);
312 xyyerror
("Undefined severityname");
313 if
(tok
->type
!= tok_severity
)
314 xyyerror
("Identifier is not of class 'severity'");
315 last_sev
= tok
->token
;
317 | tSEVERITY
'=' error { xyyerror
(err_ident
); }
318 | tSEVERITY
error { xyyerror
(err_assign
); }
321 fac
: tFACILITY
'=' token
{
322 token_t
*tok
= lookup_token
($3->name
);
324 xyyerror
("Undefined facilityname");
325 if
(tok
->type
!= tok_facility
)
326 xyyerror
("Identifier is not of class 'facility'");
327 last_fac
= tok
->token
;
329 | tFACILITY
'=' error { xyyerror
(err_ident
); }
330 | tFACILITY
error { xyyerror
(err_assign
); }
333 /*----------------------------------------------------------------------
334 * Message-text parsing
336 bodies
: body
{ $$
= add_lanmsg
(NULL
, $1); }
337 | bodies body
{ $$
= add_lanmsg
($1, $2); }
338 |
error { xyyerror
("'Language=...' (start of message text-definition) expected"); }
341 body
: lang setline lines tMSGEND
{ $$
= new_lanmsg
(&$1, $3); }
345 * The newline is to be able to set the codepage
346 * to the language based codepage for the next
347 * message to be parsed.
349 lang
: tLANGUAGE setnl
'=' token tNL
{
350 token_t
*tok
= lookup_token
($4->name
);
353 xyyerror
("Undefined language");
354 if
(tok
->type
!= tok_language
)
355 xyyerror
("Identifier is not of class 'language'");
356 if
((cpx
= find_cpxlat
(tok
->token
)))
358 set_codepage
($$.codepage
= cpx
->cpin
);
360 else if
(!tok
->codepage
)
362 const language_t
*lan
= find_language
(tok
->token
);
365 /* Just set default; warning was given while parsing languagenames */
366 set_codepage
($$.codepage
= WMC_DEFAULT_CODEPAGE
);
370 /* The default seems to be to use the DOS codepage... */
371 set_codepage
($$.codepage
= lan
->doscp
);
375 set_codepage
($$.codepage
= tok
->codepage
);
376 $$.language
= tok
->token
;
378 | tLANGUAGE setnl
'=' token
error { xyyerror
("Missing newline"); }
379 | tLANGUAGE setnl
'=' error { xyyerror
(err_ident
); }
380 | tLANGUAGE
error { xyyerror
(err_assign
); }
383 lines
: tLINE
{ $$
= $1; }
384 | lines tLINE
{ $$
= merge
($1, $2); }
385 |
error { xyyerror
(err_msg
); }
386 | lines
error { xyyerror
(err_msg
); }
389 /*----------------------------------------------------------------------
392 token
: tIDENT
{ $$
= xmalloc
(sizeof
(token_t
)); $$
->name
= $1; }
393 | tTOKEN
{ $$
= $1; }
396 setnl
: /* Empty */ { want_nl
= 1; }
399 setline
: /* Empty */ { want_line
= 1; }
402 setfile
: /* Empty */ { want_file
= 1; }
407 static WCHAR
*merge
(WCHAR
*s1
, WCHAR
*s2
)
409 int l1
= unistrlen
(s1
);
410 int l2
= unistrlen
(s2
);
411 s1
= xrealloc
(s1
, (l1
+ l2
+ 1) * sizeof
(*s1
));
412 unistrcpy
(s1
+l1
, s2
);
417 static void do_add_token
(tok_e type
, token_t
*tok
, const char *code
)
419 token_t
*tp
= lookup_token
(tok
->name
);
422 if
(tok
->type
!= type
)
423 yywarning
("Type change in token");
425 xyyerror
("Overlapping token not the same");
426 /* else its already defined and changed */
428 xyyerror
("Redefinition of %s", code
);
433 add_token
(type
, tok
->name
, tok
->token
, tok
->codepage
, tok
->alias
, 1);
438 static lanmsg_t
*new_lanmsg
(lan_cp_t
*lcp
, WCHAR
*msg
)
440 lanmsg_t
*lmp
= (lanmsg_t
*)xmalloc
(sizeof
(lanmsg_t
));
441 lmp
->lan
= lcp
->language
;
442 lmp
->cp
= lcp
->codepage
;
444 lmp
->len
= unistrlen
(msg
) + 1; /* Include termination */
446 yywarning
("Message exceptionally long; might be a missing termination");
450 static msg_t
*add_lanmsg
(msg_t
*msg
, lanmsg_t
*lanmsg
)
454 msg
= xmalloc
(sizeof
(msg_t
));
455 msg
->msgs
= xrealloc
(msg
->msgs
, (msg
->nmsgs
+1) * sizeof
(*(msg
->msgs
)));
456 msg
->msgs
[msg
->nmsgs
] = lanmsg
;
458 for
(i
= 0; i
< msg
->nmsgs
-1; i
++)
460 if
(msg
->msgs
[i
]->lan
== lanmsg
->lan
)
461 xyyerror
("Message for language 0x%x already defined", lanmsg
->lan
);
466 static int sort_lanmsg
(const void *p1
, const void *p2
)
468 return
(*(lanmsg_t
**)p1
)->lan
- (*(lanmsg_t
**)p2
)->lan
;
471 static msg_t
*complete_msg
(msg_t
*mp
, int id
)
478 xyyerror
("No symbolic name defined for message id %d", id
);
481 qsort
(mp
->msgs
, mp
->nmsgs
, sizeof
(*(mp
->msgs
)), sort_lanmsg
);
482 mp
->realid
= id |
(last_sev
<< 30) |
(last_fac
<< 16);
484 mp
->realid |
= 1 << 29;
490 static void add_node
(node_e type
, void *p
)
492 node_t
*ndp
= (node_t
*)xmalloc
(sizeof
(node_t
));
498 ndp
->prev
= nodetail
;
499 nodetail
->next
= ndp
;
504 nodehead
= nodetail
= ndp
;
508 static void test_id
(int id
)
511 for
(ndp
= nodehead
; ndp
; ndp
= ndp
->next
)
513 if
(ndp
->type
!= nd_msg
)
515 if
(ndp
->u.msg
->id
== id
&& ndp
->u.msg
->sev
== last_sev
&& ndp
->u.msg
->fac
== last_fac
)
516 xyyerror
("MessageId %d with facility 0x%x and severity 0x%x already defined", id
, last_fac
, last_sev
);
520 static int check_languages
(node_t
*head
)
522 static const char err_missing
[] = "Missing definition for language 0x%x; MessageID %d, facility 0x%x, severity 0x%x";
527 for
(ndp
= head
; ndp
; ndp
= ndp
->next
)
529 if
(ndp
->type
!= nd_msg
)
540 if
(ndp
->u.msg
->nmsgs
> msg
->nmsgs
)
551 for
(i
= 0; i
< m1
->nmsgs
; i
++)
554 error(err_missing
, m1
->msgs
[i
]->lan
, m2
->id
, m2
->fac
, m2
->sev
);
555 else if
(m1
->msgs
[i
]->lan
< m2
->msgs
[i
]->lan
)
556 error(err_missing
, m1
->msgs
[i
]->lan
, m2
->id
, m2
->fac
, m2
->sev
);
557 else if
(m1
->msgs
[i
]->lan
> m2
->msgs
[i
]->lan
)
558 error(err_missing
, m2
->msgs
[i
]->lan
, m1
->id
, m1
->fac
, m1
->sev
);
566 #define MSGRID(x) ((*(msg_t **)(x))->realid)
567 static int sort_msg
(const void *p1
, const void *p2
)
569 return MSGRID
(p1
) > MSGRID
(p2
) ?
1 : (MSGRID
(p1
) == MSGRID
(p2
) ?
0 : -1);
570 /* return (*(msg_t **)p1)->realid - (*(msg_t **)p1)->realid; */
574 * block_messages() basically transposes the messages
575 * from ID/language based list to a language/ID
578 static lan_blk_t
*block_messages
(node_t
*head
)
581 lan_blk_t
*lblktail
= NULL
;
582 lan_blk_t
*lblkhead
= NULL
;
583 msg_t
**msgtab
= NULL
;
588 int factor
= unicodeout ?
2 : 1;
590 for
(ndp
= head
; ndp
; ndp
= ndp
->next
)
592 if
(ndp
->type
!= nd_msg
)
594 msgtab
= xrealloc
(msgtab
, (nmsg
+1) * sizeof
(*msgtab
));
595 msgtab
[nmsg
++] = ndp
->u.msg
;
599 qsort
(msgtab
, nmsg
, sizeof
(*msgtab
), sort_msg
);
601 for
(nl
= 0; nl
< msgtab
[0]->nmsgs
; nl
++) /* This should be equal for all after check_languages() */
603 lbp
= xmalloc
(sizeof
(lan_blk_t
));
607 lblkhead
= lblktail
= lbp
;
611 lblktail
->next
= lbp
;
612 lbp
->prev
= lblktail
;
616 lbp
->blks
= xmalloc
(sizeof
(*lbp
->blks
));
617 lbp
->blks
[0].idlo
= msgtab
[0]->realid
;
618 lbp
->blks
[0].idhi
= msgtab
[0]->realid
;
619 /* The plus 4 is the entry header; (+3)&~3 is DWORD alignment */
620 lbp
->blks
[0].size
= ((factor
* msgtab
[0]->msgs
[nl
]->len
+ 3) & ~
3) + 4;
621 lbp
->blks
[0].msgs
= xmalloc
(sizeof
(*lbp
->blks
[0].msgs
));
622 lbp
->blks
[0].nmsg
= 1;
623 lbp
->blks
[0].msgs
[0] = msgtab
[0]->msgs
[nl
];
624 lbp
->lan
= msgtab
[0]->msgs
[nl
]->lan
;
626 for
(i
= 1; i
< nmsg
; i
++)
628 block_t
*blk
= &(lbp
->blks
[lbp
->nblk
-1]);
629 if
(msgtab
[i
]->realid
== blk
->idhi
+1)
631 blk
->size
+= ((factor
* msgtab
[i
]->msgs
[nl
]->len
+ 3) & ~
3) + 4;
633 blk
->msgs
= xrealloc
(blk
->msgs
, (blk
->nmsg
+1) * sizeof
(*blk
->msgs
));
634 blk
->msgs
[blk
->nmsg
++] = msgtab
[i
]->msgs
[nl
];
639 lbp
->blks
= xrealloc
(lbp
->blks
, lbp
->nblk
* sizeof
(*lbp
->blks
));
640 blk
= &(lbp
->blks
[lbp
->nblk
-1]);
641 blk
->idlo
= msgtab
[i
]->realid
;
642 blk
->idhi
= msgtab
[i
]->realid
;
643 blk
->size
= ((factor
* msgtab
[i
]->msgs
[nl
]->len
+ 3) & ~
3) + 4;
644 blk
->msgs
= xmalloc
(sizeof
(*blk
->msgs
));
646 blk
->msgs
[0] = msgtab
[i
]->msgs
[nl
];
654 static int sc_xlat
(const void *p1
, const void *p2
)
656 return
((cp_xlat_t
*)p1
)->lan
- ((cp_xlat_t
*)p2
)->lan
;
659 static void add_cpxlat
(int lan
, int cpin
, int cpout
)
661 cpxlattab
= xrealloc
(cpxlattab
, (ncpxlattab
+1) * sizeof
(*cpxlattab
));
662 cpxlattab
[ncpxlattab
].lan
= lan
;
663 cpxlattab
[ncpxlattab
].cpin
= cpin
;
664 cpxlattab
[ncpxlattab
].cpout
= cpout
;
666 qsort
(cpxlattab
, ncpxlattab
, sizeof
(*cpxlattab
), sc_xlat
);
669 static cp_xlat_t
*find_cpxlat
(int lan
)
673 if
(!cpxlattab
) return NULL
;
676 return
(cp_xlat_t
*)bsearch
(&t
, cpxlattab
, ncpxlattab
, sizeof
(*cpxlattab
), sc_xlat
);