2 * Wine Message Compiler parser
4 * Copyright 2000 Bertho A. Stultiens (BS)
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * The basic grammar of the file is yet another example of, humpf,
23 * design. There is a mix of context-insensitive and -sensitive
24 * stuff, which makes it rather complicated.
25 * The header definitions are all context-insensitive because they have
26 * delimited arguments, whereas the message headers are (semi-) context-
27 * sensitive and the messages themselves are, well, RFC82[12] delimited.
28 * This mixture seems to originate from the time that ms and ibm were
29 * good friends and developing os/2 according to the "compatibility"
30 * switch and reading some comments here and there.
32 * I'll ignore most of the complications and concentrate on the concept
33 * which allows me to use yacc. Basically, everything is context-
34 * insensitive now, with the exception of the message-text itself and
35 * the preceding language declaration.
51 static const char err_syntax
[] = "Syntax error\n";
52 static const char err_number
[] = "Number expected\n";
53 static const char err_ident
[] = "Identifier expected\n";
54 static const char err_assign
[] = "'=' expected\n";
55 static const char err_popen
[] = "'(' expected\n";
56 static const char err_pclose
[] = "')' expected\n";
57 static const char err_colon
[] = "':' expected\n";
58 static const char err_msg
[] = "Message expected\n";
60 /* Scanner switches */
61 int want_nl
= 0; /* Request next newlinw */
62 int want_line
= 0; /* Request next complete line */
63 int want_file
= 0; /* Request next ident as filename */
65 struct node
*nodehead
= NULL
; /* The list of all parsed elements */
66 static struct node
*nodetail
;
67 struct lan_blk
*lanblockhead
; /* List of parsed elements transposed */
69 static int base
= 16; /* Current printout base to use (8, 10 or 16) */
70 static WCHAR
*cast
= NULL
; /* Current typecast to use */
72 static int last_id
= 0; /* The last message ID parsed */
73 static int last_sev
= 0; /* Last severity code parsed */
74 static int last_fac
= 0; /* Last facility code parsed */
75 static WCHAR
*last_sym
= NULL
;/* Last alias symbol parsed */
76 static int have_sev
; /* Set if severity parsed for current message */
77 static int have_fac
; /* Set if facility parsed for current message */
78 static int have_sym
; /* Set is symbol parsed for current message */
80 static struct cp_xlat
*cpxlattab
= NULL
; /* Codepage translation table */
81 static int ncpxlattab
= 0;
84 static WCHAR
*merge
(WCHAR
*s1
, WCHAR
*s2
);
85 static struct lanmsg
*new_lanmsg
(struct lan_cp
*lcp
, WCHAR
*msg
);
86 static struct msg
*add_lanmsg
(struct msg
*msg
, struct lanmsg
*lanmsg
);
87 static struct msg
*complete_msg
(struct msg
*msg
, int id
);
88 static void add_node
(enum node_type type
, void *p
);
89 static void do_add_token
(enum tok_enum type
, struct token
*tok
, const char *code
);
90 static void test_id
(int id
);
91 static int check_languages
(struct node
*head
);
92 static struct lan_blk
*block_messages
(struct node
*head
);
93 static void add_cpxlat
(int lan
, int cpin
, int cpout
);
94 static struct cp_xlat
*find_cpxlat
(int lan
);
98 %define api.prefix
{mcy_
}
110 %token tSEVNAMES tFACNAMES tLANNAMES tBASE tCODEPAGE
111 %token tTYPEDEF tNL tSYMNAME tMSGEND
112 %token tSEVERITY tFACILITY tLANGUAGE tMSGID
113 %token
<str
> tIDENT tLINE tFILE tCOMMENT
117 %type
<str
> alias lines
118 %type
<num
> optcp id msgid clan
121 %type
<msg
> bodies msg
126 if
(!check_languages
(nodehead
))
127 xyyerror
("No messages defined\n");
128 lanblockhead
= block_messages
(nodehead
);
137 | msg
{ add_node
(nd_msg
, $1); }
138 | tCOMMENT
{ add_node
(nd_comment
, $1); }
139 |
error { xyyerror
(err_syntax
); /* `Catch all' error */ }
142 global
: tSEVNAMES
'=' '(' smaps
')'
143 | tSEVNAMES
'=' '(' smaps
error { xyyerror
(err_pclose
); }
144 | tSEVNAMES
'=' error { xyyerror
(err_popen
); }
145 | tSEVNAMES
error { xyyerror
(err_assign
); }
146 | tFACNAMES
'=' '(' fmaps
')'
147 | tFACNAMES
'=' '(' fmaps
error { xyyerror
(err_pclose
); }
148 | tFACNAMES
'=' error { xyyerror
(err_popen
); }
149 | tFACNAMES
error { xyyerror
(err_assign
); }
150 | tLANNAMES
'=' '(' lmaps
')'
151 | tLANNAMES
'=' '(' lmaps
error { xyyerror
(err_pclose
); }
152 | tLANNAMES
'=' error { xyyerror
(err_popen
); }
153 | tLANNAMES
error { xyyerror
(err_assign
); }
154 | tCODEPAGE
'=' '(' cmaps
')'
155 | tCODEPAGE
'=' '(' cmaps
error { xyyerror
(err_pclose
); }
156 | tCODEPAGE
'=' error { xyyerror
(err_popen
); }
157 | tCODEPAGE
error { xyyerror
(err_assign
); }
158 | tTYPEDEF
'=' tIDENT
{ cast
= $3; }
159 | tTYPEDEF
'=' error { xyyerror
(err_number
); }
160 | tTYPEDEF
error { xyyerror
(err_assign
); }
161 | tBASE
'=' tNUMBER
{
170 xyyerror
("Numberbase must be 8, 10 or 16\n");
173 | tBASE
'=' error { xyyerror
(err_number
); }
174 | tBASE
error { xyyerror
(err_assign
); }
177 /*----------------------------------------------------------------------
178 * SeverityNames mapping
182 |
error { xyyerror
(err_ident
); }
185 smap
: token
'=' tNUMBER alias
{
189 xyyerror
("Severity value out of range (0x%08x > 0x3)\n", $3);
190 do_add_token
(tok_severity
, $1, "severity");
192 | token
'=' error { xyyerror
(err_number
); }
193 | token
error { xyyerror
(err_assign
); }
196 /*----------------------------------------------------------------------
197 * FacilityNames mapping
201 |
error { xyyerror
(err_ident
); }
204 fmap
: token
'=' tNUMBER alias
{
208 xyyerror
("Facility value out of range (0x%08x > 0xfff)\n", $3);
209 do_add_token
(tok_facility
, $1, "facility");
211 | token
'=' error { xyyerror
(err_number
); }
212 | token
error { xyyerror
(err_assign
); }
215 alias
: /* Empty */ { $$
= NULL
; }
216 |
':' tIDENT
{ $$
= $2; }
217 |
':' error { xyyerror
(err_ident
); }
220 /*----------------------------------------------------------------------
221 * LanguageNames mapping
225 |
error { xyyerror
(err_ident
); }
228 lmap
: token
'=' tNUMBER setfile
':' tFILE optcp
{
232 do_add_token
(tok_language
, $1, "language");
233 if
(!find_language
($3) && !find_cpxlat
($3))
234 mcy_warning
("Language 0x%x not built-in, using codepage %d; use explicit codepage to override\n", $3, WMC_DEFAULT_CODEPAGE
);
236 | token
'=' tNUMBER setfile
':' error { xyyerror
("Filename expected\n"); }
237 | token
'=' tNUMBER
error { xyyerror
(err_colon
); }
238 | token
'=' error { xyyerror
(err_number
); }
239 | token
error { xyyerror
(err_assign
); }
242 optcp
: /* Empty */ { $$
= 0; }
243 |
':' tNUMBER
{ $$
= $2; }
244 |
':' error { xyyerror
("Codepage-number expected\n"); }
247 /*----------------------------------------------------------------------
252 |
error { xyyerror
(err_ident
); }
255 cmap
: clan
'=' tNUMBER
':' tNUMBER
{
256 static const char err_nocp
[] = "Codepage %d not builtin; cannot convert\n";
258 xyyerror
("Codepage translation already defined for language 0x%x\n", $1);
259 if
($3 && !is_valid_codepage
($3))
260 xyyerror
(err_nocp
, $3);
261 if
($5 && !is_valid_codepage
($5))
262 xyyerror
(err_nocp
, $5);
263 add_cpxlat
($1, $3, $5);
265 | clan
'=' tNUMBER
':' error { xyyerror
(err_number
); }
266 | clan
'=' tNUMBER
error { xyyerror
(err_colon
); }
267 | clan
'=' error { xyyerror
(err_number
); }
268 | clan
error { xyyerror
(err_assign
); }
271 clan
: tNUMBER
{ $$
= $1; }
273 if
($1->type
!= tok_language
)
274 xyyerror
("Language name or code expected\n");
279 /*----------------------------------------------------------------------
280 * Message-definition parsing
282 msg
: msgid sevfacsym
{ test_id
($1); } bodies
{ $$
= complete_msg
($4, $1); }
285 msgid
: tMSGID
'=' id
{
287 xyyerror
("Message ID value out of range (0x%08x > 0xffff)\n", $3);
290 | tMSGID
error { xyyerror
(err_assign
); }
293 id
: /* Empty */ { $$
= ++last_id
; }
294 | tNUMBER
{ $$
= last_id
= $1; }
295 |
'+' tNUMBER
{ $$
= last_id
+= $2; }
296 |
'+' error { xyyerror
(err_number
); }
299 sevfacsym: /* Empty */ { have_sev
= have_fac
= have_sym
= 0; }
300 | sevfacsym sev
{ if
(have_sev
) xyyerror
("Severity already defined\n"); have_sev
= 1; }
301 | sevfacsym fac
{ if
(have_fac
) xyyerror
("Facility already defined\n"); have_fac
= 1; }
302 | sevfacsym sym
{ if
(have_sym
) xyyerror
("Symbolname already defined\n"); have_sym
= 1; }
305 sym
: tSYMNAME
'=' tIDENT
{ last_sym
= $3; }
306 | tSYMNAME
'=' error { xyyerror
(err_ident
); }
307 | tSYMNAME
error { xyyerror
(err_assign
); }
310 sev
: tSEVERITY
'=' token
{
311 struct token
*tok
= lookup_token
($3->name
);
313 xyyerror
("Undefined severityname\n");
314 if
(tok
->type
!= tok_severity
)
315 xyyerror
("Identifier is not of class 'severity'\n");
316 last_sev
= tok
->token
;
318 | tSEVERITY
'=' error { xyyerror
(err_ident
); }
319 | tSEVERITY
error { xyyerror
(err_assign
); }
322 fac
: tFACILITY
'=' token
{
323 struct token
*tok
= lookup_token
($3->name
);
325 xyyerror
("Undefined facilityname\n");
326 if
(tok
->type
!= tok_facility
)
327 xyyerror
("Identifier is not of class 'facility'\n");
328 last_fac
= tok
->token
;
330 | tFACILITY
'=' error { xyyerror
(err_ident
); }
331 | tFACILITY
error { xyyerror
(err_assign
); }
334 /*----------------------------------------------------------------------
335 * Message-text parsing
337 bodies
: body
{ $$
= add_lanmsg
(NULL
, $1); }
338 | bodies body
{ $$
= add_lanmsg
($1, $2); }
339 |
error { xyyerror
("'Language=...' (start of message text-definition) expected\n"); }
342 body
: lang setline lines tMSGEND
{ $$
= new_lanmsg
(&$1, $3); }
346 * The newline is to be able to set the codepage
347 * to the language based codepage for the next
348 * message to be parsed.
350 lang
: tLANGUAGE setnl
'=' token tNL
{
351 struct token
*tok
= lookup_token
($4->name
);
354 xyyerror
("Undefined language\n");
355 if
(tok
->type
!= tok_language
)
356 xyyerror
("Identifier is not of class 'language'\n");
357 if
((cpx
= find_cpxlat
(tok
->token
)))
359 set_codepage
($$.codepage
= cpx
->cpin
);
361 else if
(!tok
->codepage
)
363 const struct language
*lan
= find_language
(tok
->token
);
366 /* Just set default; warning was given while parsing languagenames */
367 set_codepage
($$.codepage
= WMC_DEFAULT_CODEPAGE
);
371 /* The default seems to be to use the DOS codepage... */
372 set_codepage
($$.codepage
= lan
->doscp
);
376 set_codepage
($$.codepage
= tok
->codepage
);
377 $$.language
= tok
->token
;
379 | tLANGUAGE setnl
'=' token
error { xyyerror
("Missing newline\n"); }
380 | tLANGUAGE setnl
'=' error { xyyerror
(err_ident
); }
381 | tLANGUAGE
error { xyyerror
(err_assign
); }
384 lines
: tLINE
{ $$
= $1; }
385 | lines tLINE
{ $$
= merge
($1, $2); }
386 |
error { xyyerror
(err_msg
); }
387 | lines
error { xyyerror
(err_msg
); }
390 /*----------------------------------------------------------------------
393 token
: tIDENT
{ $$
= xmalloc
(sizeof
(struct token
)); memset
($$
,0,sizeof
(*$$
)); $$
->name
= $1; }
394 | tTOKEN
{ $$
= $1; }
397 setnl
: /* Empty */ { want_nl
= 1; }
400 setline
: /* Empty */ { want_line
= 1; }
403 setfile
: /* Empty */ { want_file
= 1; }
408 static WCHAR
*merge
(WCHAR
*s1
, WCHAR
*s2
)
410 int l1
= unistrlen
(s1
);
411 int l2
= unistrlen
(s2
);
412 s1
= xrealloc
(s1
, (l1
+ l2
+ 1) * sizeof
(*s1
));
413 unistrcpy
(s1
+l1
, s2
);
418 static void do_add_token
(enum tok_enum type
, struct token
*tok
, const char *code
)
420 struct token
*tp
= lookup_token
(tok
->name
);
423 if
(tok
->type
!= type
)
424 mcy_warning
("Type change in token\n");
426 xyyerror
("Overlapping token not the same\n");
427 /* else its already defined and changed */
429 xyyerror
("Redefinition of %s\n", code
);
434 add_token
(type
, tok
->name
, tok
->token
, tok
->codepage
, tok
->alias
, 1);
439 static struct lanmsg
*new_lanmsg
(struct lan_cp
*lcp
, WCHAR
*msg
)
441 struct lanmsg
*lmp
= xmalloc
(sizeof
(*lmp
));
442 lmp
->lan
= lcp
->language
;
443 lmp
->cp
= lcp
->codepage
;
445 lmp
->len
= unistrlen
(msg
) + 1; /* Include termination */
446 lmp
->file
= input_name
;
447 lmp
->line
= line_number
;
449 mcy_warning
("Message exceptionally long; might be a missing termination\n");
453 static struct msg
*add_lanmsg
(struct msg
*msg
, struct lanmsg
*lanmsg
)
458 msg
= xmalloc
(sizeof
(*msg
));
459 memset
( msg
, 0, sizeof
(*msg
) );
461 msg
->msgs
= xrealloc
(msg
->msgs
, (msg
->nmsgs
+1) * sizeof
(*(msg
->msgs
)));
462 msg
->msgs
[msg
->nmsgs
] = lanmsg
;
464 for
(i
= 0; i
< msg
->nmsgs
-1; i
++)
466 if
(msg
->msgs
[i
]->lan
== lanmsg
->lan
)
467 xyyerror
("Message for language 0x%x already defined\n", lanmsg
->lan
);
472 static int sort_lanmsg
(const void *p1
, const void *p2
)
474 return
(*(const struct lanmsg
* const *)p1
)->lan
- (*(const struct lanmsg
* const*)p2
)->lan
;
477 static struct msg
*complete_msg
(struct msg
*mp
, int id
)
484 xyyerror
("No symbolic name defined for message id %d\n", id
);
487 qsort
(mp
->msgs
, mp
->nmsgs
, sizeof
(*(mp
->msgs
)), sort_lanmsg
);
488 mp
->realid
= id |
(last_sev
<< 30) |
(last_fac
<< 16);
490 mp
->realid |
= 1 << 29;
496 static void add_node
(enum node_type type
, void *p
)
498 struct node
*ndp
= xmalloc
(sizeof
(*ndp
));
499 memset
( ndp
, 0, sizeof
(*ndp
) );
505 ndp
->prev
= nodetail
;
506 nodetail
->next
= ndp
;
511 nodehead
= nodetail
= ndp
;
515 static void test_id
(int id
)
518 for
(ndp
= nodehead
; ndp
; ndp
= ndp
->next
)
520 if
(ndp
->type
!= nd_msg
)
522 if
(ndp
->u.msg
->id
== id
&& ndp
->u.msg
->sev
== last_sev
&& ndp
->u.msg
->fac
== last_fac
)
523 xyyerror
("MessageId %d with facility 0x%x and severity 0x%x already defined\n", id
, last_fac
, last_sev
);
527 static int check_languages
(struct node
*head
)
529 static const char err_missing
[] = "Missing definition for language 0x%x; MessageID %d, facility 0x%x, severity 0x%x\n";
532 struct msg
*msg
= NULL
;
534 for
(ndp
= head
; ndp
; ndp
= ndp
->next
)
536 if
(ndp
->type
!= nd_msg
)
547 if
(ndp
->u.msg
->nmsgs
> msg
->nmsgs
)
558 for
(i
= 0; i
< m1
->nmsgs
; i
++)
561 error(err_missing
, m1
->msgs
[i
]->lan
, m2
->id
, m2
->fac
, m2
->sev
);
562 else if
(m1
->msgs
[i
]->lan
< m2
->msgs
[i
]->lan
)
563 error(err_missing
, m1
->msgs
[i
]->lan
, m2
->id
, m2
->fac
, m2
->sev
);
564 else if
(m1
->msgs
[i
]->lan
> m2
->msgs
[i
]->lan
)
565 error(err_missing
, m2
->msgs
[i
]->lan
, m1
->id
, m1
->fac
, m1
->sev
);
573 #define MSGRID(x) ((*(const struct msg * const*)(x))->realid)
574 static int sort_msg
(const void *p1
, const void *p2
)
576 return MSGRID
(p1
) > MSGRID
(p2
) ?
1 : (MSGRID
(p1
) == MSGRID
(p2
) ?
0 : -1);
580 * block_messages() basically transposes the messages
581 * from ID/language based list to a language/ID
584 static struct lan_blk
*block_messages
(struct node
*head
)
587 struct lan_blk
*lblktail
= NULL
;
588 struct lan_blk
*lblkhead
= NULL
;
589 struct msg
**msgtab
= NULL
;
596 for
(ndp
= head
; ndp
; ndp
= ndp
->next
)
598 if
(ndp
->type
!= nd_msg
)
600 msgtab
= xrealloc
(msgtab
, (nmsg
+1) * sizeof
(*msgtab
));
601 msgtab
[nmsg
++] = ndp
->u.msg
;
605 qsort
(msgtab
, nmsg
, sizeof
(*msgtab
), sort_msg
);
607 for
(nl
= 0; nl
< msgtab
[0]->nmsgs
; nl
++) /* This should be equal for all after check_languages() */
609 lbp
= xmalloc
(sizeof
(*lbp
));
610 memset
( lbp
, 0, sizeof
(*lbp
) );
613 lblkhead
= lblktail
= lbp
;
617 lblktail
->next
= lbp
;
618 lbp
->prev
= lblktail
;
622 lbp
->blks
= xmalloc
(sizeof
(*lbp
->blks
));
623 lbp
->blks
[0].idlo
= msgtab
[0]->realid
;
624 lbp
->blks
[0].idhi
= msgtab
[0]->realid
;
625 /* The plus 4 is the entry header; (+3)&~3 is DWORD alignment */
626 lbp
->blks
[0].size
= ((factor
* msgtab
[0]->msgs
[nl
]->len
+ 3) & ~
3) + 4;
627 lbp
->blks
[0].msgs
= xmalloc
(sizeof
(*lbp
->blks
[0].msgs
));
628 lbp
->blks
[0].nmsg
= 1;
629 lbp
->blks
[0].msgs
[0] = msgtab
[0]->msgs
[nl
];
630 lbp
->lan
= msgtab
[0]->msgs
[nl
]->lan
;
632 for
(i
= 1; i
< nmsg
; i
++)
634 struct block
*blk
= &(lbp
->blks
[lbp
->nblk
-1]);
635 if
(msgtab
[i
]->realid
== blk
->idhi
+1)
637 blk
->size
+= ((factor
* msgtab
[i
]->msgs
[nl
]->len
+ 3) & ~
3) + 4;
639 blk
->msgs
= xrealloc
(blk
->msgs
, (blk
->nmsg
+1) * sizeof
(*blk
->msgs
));
640 blk
->msgs
[blk
->nmsg
++] = msgtab
[i
]->msgs
[nl
];
645 lbp
->blks
= xrealloc
(lbp
->blks
, lbp
->nblk
* sizeof
(*lbp
->blks
));
646 blk
= &(lbp
->blks
[lbp
->nblk
-1]);
647 blk
->idlo
= msgtab
[i
]->realid
;
648 blk
->idhi
= msgtab
[i
]->realid
;
649 blk
->size
= ((factor
* msgtab
[i
]->msgs
[nl
]->len
+ 3) & ~
3) + 4;
650 blk
->msgs
= xmalloc
(sizeof
(*blk
->msgs
));
652 blk
->msgs
[0] = msgtab
[i
]->msgs
[nl
];
660 static int sc_xlat
(const void *p1
, const void *p2
)
662 return
((const struct cp_xlat
*)p1
)->lan
- ((const struct cp_xlat
*)p2
)->lan
;
665 static void add_cpxlat
(int lan
, int cpin
, int cpout
)
667 cpxlattab
= xrealloc
(cpxlattab
, (ncpxlattab
+1) * sizeof
(*cpxlattab
));
668 cpxlattab
[ncpxlattab
].lan
= lan
;
669 cpxlattab
[ncpxlattab
].cpin
= cpin
;
670 cpxlattab
[ncpxlattab
].cpout
= cpout
;
672 qsort
(cpxlattab
, ncpxlattab
, sizeof
(*cpxlattab
), sc_xlat
);
675 static struct cp_xlat
*find_cpxlat
(int lan
)
679 if
(!cpxlattab
) return NULL
;
682 return
(struct cp_xlat
*)bsearch
(&t
, cpxlattab
, ncpxlattab
, sizeof
(*cpxlattab
), sc_xlat
);