use the new flexcat's source description file
[AROS.git] / tools / flexcat / src / scanpo.c
blob484babd277dcb58a533ab197734ce52b9596b3b5
1 /*
2 * $Id$
4 * Copyright (C) 1993-1999 Jochen Wiedmann and Marcin Orlowski
5 * Copyright (C) 2002-2014 FlexCat Open Source Team
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or (at
10 * your option) any later version.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 #ifdef AMIGA
24 #include <proto/locale.h> /* This is to get locale.library/IsAlpha() */
25 #endif
27 #include <errno.h>
28 #include <limits.h>
29 #include <time.h>
31 #include "flexcat.h"
32 #include "showfuncs.h"
33 #include "readprefs.h"
34 #include "globals.h"
35 #include "utils.h"
36 #include "createcat.h"
38 extern char *CatVersionString;
39 extern char *CatLanguage;
40 extern char *CatRcsId;
41 extern char *CatName;
42 extern int CodeSet;
43 extern int CT_Scanned;
45 #define IS_NUMBER_OR_LETTER(c) (((c) >= '0' && (c) <= '9') || \
46 ((c) >= 'a' && (c) <= 'z') || \
47 ((c) >= 'A' && (c) <= 'Z'))
49 #if defined(__amigaos3__) || defined(__MORPHOS__) || defined(WIN32) || defined(unix)
50 char *strptime(const char *string, const char *fmt, struct tm *res);
51 #endif
53 /// ScanCTFile
55 /* This function scans a PO-style format catalog description/translation file.
57 Inputs: pofile - name of the description/translation file to scan.
58 Result: TRUE if successful, FALSE otherwise.
60 int ScanPOFile(char *pofile)
62 FILE *fp;
63 char *newline, *line;
64 int Result = TRUE;
65 int CodeSet_checked = FALSE;
66 int revision_found = FALSE;
67 int inHeader = TRUE;
68 int NextID = 0;
69 const char *PoSrcCharset = "utf-8";
70 const char *CatDstCharset = "iso-8859-1";
71 char CatVersionDate[255] = "";
72 char CatProjectName[255] = "";
73 struct CatString *cs = NULL;
74 struct CatString **csptr = &FirstCatString;
75 int inMsgID = FALSE;
76 int inMsgSTR = FALSE;
78 ScanFile = pofile;
79 ScanLine = 0;
80 if((fp = fopen(pofile, "r")) == NULL)
81 ShowErrorQuick(MSG_ERR_NOCATALOGTRANSLATION, pofile);
83 if(!NoBufferedIO)
84 setvbuf(fp, NULL, _IOFBF, buffer_size);
86 while(!feof(fp) && (line = newline = ReadLine(fp, TRUE)) != NULL)
88 if(inHeader == TRUE)
90 if(*line == '\0')
92 inHeader = FALSE;
94 // we found the end of the header so lets check if we have all
95 // we require to continue
96 if(CatVersion > 0 && CatVersionDate[0] != '\0' && CatProjectName[0] != '\0' &&
97 CatVersionString == NULL)
99 char buf[255];
101 // warn about missing revision information
102 if(CatRevision == 0)
103 ShowWarn(MSG_ERR_NO_CAT_REVISION);
105 if(strstr(CatProjectName, ".catalog") != NULL)
106 snprintf(buf, sizeof(buf), "$VER: %s %d.%d (%s)", CatProjectName, CatVersion, CatRevision, CatVersionDate);
107 else
108 snprintf(buf, sizeof(buf), "$VER: %s.catalog %d.%d (%s)", CatProjectName, CatVersion, CatRevision, CatVersionDate);
109 CatVersionString = AllocString(buf);
112 else switch(*line)
114 case '#':
116 // comment lines start with #
117 // but they may contain some valueable information for catalog
118 // file creation. So lets parse these lines as well
119 while(*line == '#' || *line == ' ' || *line == '\t')
120 ++line;
122 if(Strnicmp(line, "version", 7) == 0)
124 line += 8;
125 OverSpace(&line);
126 CatVersion = strtol(line, &line, 0);
128 else if(Strnicmp(line, "revision", 8) == 0)
130 line += 9;
131 OverSpace(&line);
132 CatRevision = strtol(line, &line, 0);
133 revision_found = TRUE;
135 else if(revision_found == FALSE &&
136 Strnicmp(line, "$Id: ", 5) == 0)
138 char *p;
140 line += 6;
141 p = line;
143 // search second space
144 p = strchr(p, ' ');
145 if(p != NULL)
147 p++;
148 CatRevision = strtol(p, &p, 0);
151 else if(revision_found == FALSE &&
152 Strnicmp(line, "$Revision: ", 11) == 0)
154 line += 12;
155 CatRevision = strtol(line, &line, 0);
158 break;
160 case '"':
162 if(Strnicmp(line, "\"Language: ", 11) == 0)
164 char *p;
165 const char *language = NULL;
167 if(CatLanguage)
168 ShowError(MSG_ERR_DOUBLECTLANGUAGE);
170 line += 11;
171 p = strchr(line, '\\');
172 if(p != NULL)
173 *p = '\0';
175 if(Stricmp(line, "bs") == 0) // bosnian
177 language = "bosanski";
178 CatDstCharset = "iso-8859-2";
180 else if(Stricmp(line, "ca") == 0) // catalan
182 language = "catalĂ ";
183 CatDstCharset = "iso-8859-15";
185 else if(Stricmp(line, "hr") == 0) // croatian
187 language = "hrvatski";
188 CatDstCharset = "iso-8859-16";
190 else if(Stricmp(line, "cs") == 0) // czech
192 language = "czech";
193 CatDstCharset = "iso-8859-2";
195 else if(Stricmp(line, "da") == 0) // danish
197 language = "dansk";
198 CatDstCharset = "iso-8859-15";
200 else if(Stricmp(line, "nl") == 0) // dutch
202 language = "nederlands";
203 CatDstCharset = "iso-8859-15";
205 else if(Stricmp(line, "en_GB") == 0) // english-british
206 language = "english-british";
207 else if(Stricmp(line, "fi") == 0) // finnish
209 language = "suomi";
210 CatDstCharset = "iso-8859-15";
212 else if(Stricmp(line, "fr") == 0) // french
214 language = "français";
215 CatDstCharset = "iso-8859-15";
217 else if(Stricmp(line, "de") == 0) // german
219 language = "deutsch";
220 CatDstCharset = "iso-8859-15";
222 else if(Stricmp(line, "el") == 0) // greek
224 language = "greek";
225 CatDstCharset = "iso-8859-7";
227 else if(Stricmp(line, "hu") == 0) // hungarian
229 language = "magyar";
230 CatDstCharset = "iso-8859-16";
232 else if(Stricmp(line, "it") == 0) // italian
234 language = "italiano";
235 CatDstCharset = "iso-8859-15";
237 else if(Stricmp(line, "ja") == 0) // japanese
239 language = "nihongo";
240 CatDstCharset = "euc-jp";
242 else if(Stricmp(line, "ko") == 0) // korean
244 language = "hangul";
245 CatDstCharset = "euc-kr";
247 else if(Stricmp(line, "no") == 0) // norwegian
249 language = "norsk";
250 CatDstCharset = "iso-8859-15";
252 else if(Stricmp(line, "fa") == 0) // persian
254 language = "farsi";
255 CatDstCharset = "utf-8";
257 else if(Stricmp(line, "pl") == 0) // polish
259 language = "polski";
260 CatDstCharset = "iso-8859-16";
262 else if(Stricmp(line, "pt") == 0) // portuguese
263 language = "portuguĂŞs";
264 else if(Stricmp(line, "pt_BR") == 0) // portuguese-brazil
265 language = "portuguĂŞs-brasil";
266 else if(Stricmp(line, "ru") == 0) // russian
268 language = "russian";
269 #if defined(AMIGA)
270 CatDstCharset = "Amiga-1251";
271 #else
272 CatDstCharset = "windows-1251"; // iconv doesn't know anything about Amiga-1251 :(
273 #endif
275 else if(Stricmp(line, "sr") == 0) // serbian
277 language = "srpski";
278 CatDstCharset = "iso-8859-16";
280 else if(Stricmp(line, "sl") == 0) // slovenian
282 language = "slovensko";
283 CatDstCharset = "iso-8859-2";
285 else if(Stricmp(line, "es") == 0) // spanish
287 language = "español";
288 CatDstCharset = "iso-8859-15";
290 else if(Stricmp(line, "sv") == 0) // swedish
292 language = "svenska";
293 CatDstCharset = "iso-8859-15";
295 else if(Stricmp(line, "tr") == 0) // turkish
297 language = "türkçe";
298 CatDstCharset = "iso-8859-9";
301 if(language != NULL)
302 CatLanguage = AddCatalogChunk(strdup("LANG"), language);
304 else if(Strnicmp(line, "\"Language-Team: ", 16) == 0)
306 char *p;
308 line += 16;
309 p = strchr(line, '\\');
310 if(p != NULL)
311 *p = '\0';
313 AddCatalogChunk(strdup("AUTH"), line);
315 else if(CodeSet_checked == FALSE &&
316 Strnicmp(line, "\"Content-Type: ", 15) == 0)
318 char *p;
320 line += 16;
321 p = strstr(line, "charset=");
322 if(p != NULL)
324 char *q;
326 p += 8;
328 q = strchr(p, '\\');
329 if(q != NULL)
330 *q = '\0';
332 PoSrcCharset = strdup(p);
335 CodeSet_checked = TRUE;
337 else if(Strnicmp(line, "\"PO-Revision-Date: ", 19) == 0)
339 struct tm tm;
341 line += 19;
342 memset(&tm, 0, sizeof(tm));
343 strptime(line, "%Y-%m-%d", &tm);
344 strftime(CatVersionDate, sizeof(CatVersionDate), "%d.%m.%Y", &tm);
346 else if(Strnicmp(line, "\"Catalog-Name: ", 15) == 0)
348 char *p;
350 line += 15;
351 p = strchr(line, '\\');
352 if(p != NULL)
353 *p = '\0';
355 strcpy(CatProjectName, line);
357 else if(Strnicmp(line, "\"Project-Id-Version: ", 21) == 0 && CatProjectName[0] == '\0')
359 // fall back to the project ID as catalog name if it is not yet defined
360 char *p;
362 line += 21;
363 p = strchr(line, '\\');
364 if(p != NULL)
365 *p = '\0';
367 strcpy(CatProjectName, line);
370 break;
373 else
375 // check if we found a line starting with "msgctxt" as that signals us
376 // a new catalog string should be added
377 if(Strnicmp(line, "msgctxt \"", 9) == 0)
379 char *idstr;
381 // we found a new 'msgctxt' lets clear cs
382 cs = NULL;
383 inMsgID = FALSE;
384 inMsgSTR = FALSE;
386 line += 9;
388 /* Check for blanks at the start of line. */
389 if(*line == ' ' || *line == '\t')
391 ShowError(MSG_ERR_UNEXPECTEDBLANKS);
392 OverSpace(&line);
395 idstr = line;
396 while(IS_NUMBER_OR_LETTER(*line) || *line == '_')
397 ++line;
399 if(idstr == line)
401 ShowError(MSG_ERR_NOIDENTIFIER);
402 Result = FALSE;
404 else
406 int found;
408 if((cs = malloc(sizeof(*cs))) == NULL)
409 MemError();
411 // search for the next catstring ID in case the ID
412 // specifier is missing "(//)" in the msgctxt
415 struct CatString *scs;
417 found = TRUE;
418 for(scs = FirstCatString; scs != NULL; scs = scs->Next)
420 if(scs->ID == NextID)
422 found = FALSE;
423 ++NextID;
424 break;
428 while(found == FALSE);
430 cs->Next = NULL;
431 cs->ID = NextID;
432 cs->MinLen = 0;
433 cs->MaxLen = -1;
434 cs->CD_Str = (char *)"";
435 cs->CT_Str = NULL;
436 cs->NotInCT = TRUE;
437 cs->POformat = TRUE;
439 if((cs->ID_Str = malloc((line - idstr) + 1)) == NULL)
440 MemError();
442 strncpy(cs->ID_Str, idstr, line - idstr);
443 cs->ID_Str[line - idstr] = '\0';
444 OverSpace(&line);
446 /* Check if next char in line is '('? (//) */
447 if(*line != '(')
449 ShowError(MSG_ERR_NO_LEADING_BRACKET, cs->ID_Str);
450 Result = FALSE;
452 else
454 struct CatString *scs;
456 ++line;
457 OverSpace(&line);
459 /* Check for default config of line (//) */
460 if(*line != '/')
462 if(*line == '+')
463 NextID = cs->ID = NextID + strtol(line, &line, 0);
464 else if(*line == '$')
466 line++;
467 cs->ID = NextID = strtol(line, &line, 16);
469 else
470 cs->ID = NextID = strtol(line, &line, 0);
472 OverSpace(&line);
475 /* Check for already used identifier. */
476 for(scs = FirstCatString; scs != NULL; scs = scs->Next)
478 if(scs->ID == cs->ID)
480 ShowError(MSG_ERR_DOUBLE_ID, cs->ID_Str);
481 Result = FALSE;
483 if(strcmp(cs->ID_Str, scs->ID_Str) == 0)
485 ShowError(MSG_ERR_DOUBLE_IDENTIFIER, cs->ID_Str);
486 Result = FALSE;
490 /* Check for min/len values (//) */
491 if(*line != '/')
493 ShowWarn(MSG_ERR_NO_MIN_LEN, cs->ID_Str);
494 Result = FALSE;
496 else
498 ++line;
499 OverSpace(&line);
500 if(*line != '/')
502 cs->MinLen = strtol(line, &line, 0);
503 OverSpace(&line);
505 if(*line != '/')
507 ShowWarn(MSG_ERR_NO_MAX_LEN, cs->ID_Str);
508 Result = FALSE;
510 else
512 ++line;
513 OverSpace(&line);
514 if(*line != ')')
516 cs->MaxLen = strtol(line, &line, 0);
517 OverSpace(&line);
519 if(*line != ')')
521 ShowError(MSG_ERR_NO_TRAILING_BRACKET, cs->ID_Str);
522 Result = FALSE;
524 else
526 ++line;
527 OverSpace(&line);
528 if(*line && *line != '\"')
529 ShowError(MSG_ERR_EXTRA_CHARACTERS_ID, cs->ID_Str);
535 //printf("ID_Str: '%s' (%d)\n", cs->ID_Str, cs->ID);
537 cs->Nr = NumStrings;
538 cs->LenBytes = 0;
539 *csptr = cs;
540 csptr = &cs->Next;
541 ++NumStrings;
544 else if(cs != NULL)
546 char *p;
548 // if the user want to force a certain output (destination)
549 // codeset we set it here.
550 if(DestCodeset[0] != '\0')
551 CatDstCharset = DestCodeset;
553 // Make sure double backslashes end up in a single backslash.
554 // We catch any double backslash followed by a zero character,
555 // which covers strings like "\\0" and "\\033" or "\\33" as these are
556 // common strings in MUI applications.
557 while((p = strstr(line, "\\\\0")) != NULL || (p = strstr(line, "\\\\33")) != NULL)
558 memmove(p, p+1, strlen(p));
560 // unquote the string
561 if(line[strlen(line)-1] == '"')
562 line[strlen(line)-1] = '\0';
564 if(Strnicmp(line, "msgid \"", 7) == 0)
566 line += 7;
568 // if the string starts with <EMPTY> we out to remove
569 // the rest of the string!
570 if(strncmp(line, "<EMPTY>", 7) == 0)
571 *line = '\0';
573 if(strlen(line) > 0)
574 cs->CD_Str = ConvertString(line, PoSrcCharset, CatDstCharset);
575 else
577 cs->CD_Str = malloc(1);
578 cs->CD_Str[0] = '\0';
581 //printf("CD_Str: '%s' '%s'\n", cs->CD_Str, line);
583 inMsgID = TRUE;
584 inMsgSTR = FALSE;
586 else if(Strnicmp(line, "msgstr \"", 8) == 0)
588 line += 8;
590 if(strlen(line) > 0)
591 cs->CT_Str = ConvertString(line, PoSrcCharset, CatDstCharset);
592 else
594 cs->CT_Str = malloc(1);
595 cs->CT_Str[0] = '\0';
598 cs->NotInCT = FALSE;
600 //printf("CT_Str: '%s'\n", cs->CT_Str);
602 inMsgSTR = TRUE;
603 inMsgID = FALSE;
605 else if(*line == '"') // line starts with "
607 line++;
609 if(inMsgID == TRUE)
611 char *t = ConvertString(line, PoSrcCharset, CatDstCharset);
613 cs->CD_Str = AddString(cs->CD_Str, t);
615 //printf("CD_Str2: '%s' '%s'\n", cs->CD_Str, line);
617 free(t);
619 else if(inMsgSTR == TRUE)
621 char *t = ConvertString(line, PoSrcCharset, CatDstCharset);
623 cs->CT_Str = AddString(cs->CT_Str, t);
625 //printf("CT_Str2: '%s' '%s'\n", cs->CT_Str, line);
627 free(t);
635 printf("CatVersion: %d.%d\n", CatVersion, CatRevision);
636 printf("CatVersionDate: '%s'\n", CatVersionDate);
637 printf("CatVersionString: '%s'\n", CatVersionString);
638 printf("CatLanguage: '%s'\n", CatLanguage);
639 printf("PoSrcCharset: '%s'\n", PoSrcCharset);
640 printf("CatDstCharset: '%s'\n", CatDstCharset);
643 if(!CodeSet_checked)
644 ShowErrorQuick(MSG_ERR_NOCTCODESET);
646 if(!(CatVersionString || (CatRcsId && CatName)))
647 ShowErrorQuick(MSG_ERR_NOCTVERSION);
649 // lets translate CatDstCharset to CodeSet number
650 if(Stricmp(CatDstCharset, "iso-8859-1") == 0)
651 CodeSet = 4;
652 else if(Stricmp(CatDstCharset, "iso-8859-2") == 0)
653 CodeSet = 5;
654 else if(Stricmp(CatDstCharset, "iso-8859-7") == 0)
655 CodeSet = 10;
656 else if(Stricmp(CatDstCharset, "iso-8859-9") == 0)
657 CodeSet = 12;
658 else if(Stricmp(CatDstCharset, "utf-8") == 0 || Stricmp(CatDstCharset, "utf8") == 0)
659 CodeSet = 106;
660 else if(Stricmp(CatDstCharset, "iso-8859-15") == 0)
661 CodeSet = 111;
662 else if(Stricmp(CatDstCharset, "iso-8859-16") == 0)
663 CodeSet = 112;
664 else if(Stricmp(CatDstCharset, "amiga-1251") == 0 || Stricmp(CatDstCharset, "windows-1251"))
665 CodeSet = 2104;
666 else
667 CodeSet = 0;
669 // check consistenty of translations found
670 for(cs = FirstCatString; cs != NULL; cs = cs->Next)
672 if(cs->CT_Str == NULL)
673 ShowWarnQuick(MSG_ERR_MISSINGTRANSLATION, cs->ID_Str);
674 else
676 size_t reallen;
677 size_t cd_len;
679 /* Get string length */
680 reallen = strlen(cs->CT_Str);
681 cd_len = strlen(cs->CD_Str);
683 // check for empty translations
684 if(cd_len > 0)
686 if(reallen == 0)
688 // for .po files empty strings are really missing translations
689 ShowWarnQuick(MSG_ERR_MISSINGTRANSLATION, cs->ID_Str);
691 // now remove the cs from the list
692 cs->NotInCT = TRUE;
693 continue;
695 else if(strcmp(cs->CT_Str, "<EMPTY>") == 0)
697 // string should be intentionally empty
698 cs->CT_Str[0] = '\0';
702 if(cs->MinLen > 0 && reallen < (size_t)cs->MinLen)
703 ShowWarnQuick(MSG_ERR_STRING_TOO_SHORT, cs->ID_Str);
705 if(cs->MaxLen > 0 && reallen > (size_t)cs->MaxLen)
706 ShowWarnQuick(MSG_ERR_STRING_TOO_LONG, cs->ID_Str);
708 /* Check for trailing ellipsis. */
709 if(reallen >= 3 && cd_len >= 3)
711 if(strcmp(&cs->CD_Str[cd_len - 3], "...") == 0 &&
712 strcmp(&cs->CT_Str[reallen - 3], "...") != 0)
714 ShowWarnQuick(MSG_ERR_TRAILING_ELLIPSIS, cs->ID_Str);
717 if(strcmp(&cs->CD_Str[cd_len - 3], "...") != 0 &&
718 strcmp(&cs->CT_Str[reallen - 3], "...") == 0)
720 ShowWarnQuick(MSG_ERR_NO_TRAILING_ELLIPSIS, cs->ID_Str);
724 /* Check for trailing spaces. */
725 if(reallen >= 1 && cd_len >= 1)
727 if(strcmp(&cs->CD_Str[cd_len - 1], " ") == 0 &&
728 strcmp(&cs->CT_Str[reallen - 1], " ") != 0)
731 ShowWarnQuick(MSG_ERR_TRAILING_BLANKS, cs->ID_Str);
734 if(strcmp(&cs->CD_Str[cd_len - 1], " ") != 0 &&
735 strcmp(&cs->CT_Str[reallen - 1], " ") == 0)
738 ShowWarnQuick(MSG_ERR_NO_TRAILING_BLANKS, cs->ID_Str);
742 /* Check for matching placeholders */
743 if(reallen >= 1 && cd_len >= 1)
745 char *cdP = cs->CD_Str;
746 char *ctP = cs->CT_Str;
750 cdP = strchr(cdP, '%');
751 ctP = strchr(ctP, '%');
753 if(cdP == NULL && ctP == NULL)
755 // no more placeholders, bail out
756 break;
758 else if(cdP != NULL && ctP != NULL)
760 // skip the '%' sign
761 cdP++;
762 ctP++;
764 // check the placeholder only if the '%' is followed by an
765 // alpha-numerical character or another percent sign
766 if(IS_NUMBER_OR_LETTER(*cdP) || *cdP == '%')
768 if(*cdP != *ctP)
770 ShowWarnQuick(MSG_ERR_MISMATCHING_PLACEHOLDERS, cs->ID_Str);
772 break;
775 // skip the second '%' sign
776 if(*cdP == '%')
777 cdP++;
778 if(*ctP == '%')
779 ctP++;
781 else if(IS_NUMBER_OR_LETTER(*ctP) || *ctP == '%')
783 // the translation uses a placeholder while the description
784 // uses none.
785 ShowWarnQuick(MSG_ERR_EXCESSIVE_PLACEHOLDERS, cs->ID_Str);
787 break;
790 else if(cdP != NULL && ctP == NULL)
792 // skip the '%' sign
793 cdP++;
795 // check if really a placeholder follows or just another percent sign
796 // the original string is allowed to contain more single percent signs than the translated string
797 if(IS_NUMBER_OR_LETTER(*cdP) || *cdP == '%')
799 // the description uses at least one more placeholder than the translation
800 ShowWarnQuick(MSG_ERR_MISSING_PLACEHOLDERS, cs->ID_Str);
803 break;
805 else if(cdP == NULL && ctP != NULL)
807 // skip the '%' sign
808 ctP++;
810 // check if really a placeholder follows or just another percent sign
811 // the translated string is allowed to contain more single percent signs than the original string
812 if(IS_NUMBER_OR_LETTER(*ctP) || *ctP == '%')
814 // the translation uses at least one more placeholder than the description
815 ShowWarnQuick(MSG_ERR_EXCESSIVE_PLACEHOLDERS, cs->ID_Str);
818 break;
821 while(TRUE);
826 if(line != NULL)
827 free(line);
829 fclose(fp);
831 if(WarnCTGaps)
833 for(cs = FirstCatString; cs != NULL; cs = cs->Next)
835 if(cs->CT_Str == NULL)
837 ShowWarn(MSG_ERR_CTGAP, cs->ID_Str);
842 if(Result)
843 CT_Scanned = TRUE;
845 return(Result);