Bringing flexcat 2.15 into the main branch (again)
[AROS.git] / tools / flexcat / contrib / cd2po / cd2po.sh
blob1b0ebe6bf77a27aa10a8d99eb387bfe3cad1cc0b
1 #!/bin/bash
3 # a shell script that allows to convert between Amiga-style catalog
4 # description/translation files (.cd/.ct) and gettext-style translation
5 # files (.pot/.po).
7 # Copyright 2013-2014 Jens Maus <mail@jens-maus.de>
9 # This program is free software; you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation; either version 2 of the License, or
12 # (at your option) any later version.
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
19 # You should have received a copy of the GNU General Public License
20 # along with this program; if not, write to the Free Software
21 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 # $Id$
26 VERSION="1.6"
28 ########################################################
29 # Script starts here
32 displayUsage()
34 echo >&2 "cd2po.sh v${VERSION} - convert between Amiga-style and gettext translation files"
35 echo >&2 "Copyright (c) 2013-2014 Jens Maus <mail@jens-maus.de>"
36 echo >&2
37 echo >&2 "Usage: $0 <options> [inputfile (.cd/.ct/.pot/.po)]"
38 echo >&2 "Options:"
39 echo >&2 " -c <charset> : use <charset> when converting the input file"
40 echo >&2 " default: iso-8859-1"
43 ################################
44 # AWK scripts #
45 ################################
47 # the following is an awk script that converts an
48 # Amiga-style catalog description file (.cd) to a gettext
49 # PO-style translation template file (.pot).
50 read -d '' cd2pot << 'EOF'
51 BEGIN {
52 tagfound=0
53 firsttag=0
54 multiline=0
56 # get current date/time
57 cmd="date +'%Y-%m-%d %H:%M%z'"
58 cmd | getline date
60 print "# Translation catalog description file (pot-style)"
61 print "# $Id$"
62 print "#"
63 print "# WARNING: This file was automatically generated by cd2po.sh"
64 print "#"
67 if(firsttag == 0)
69 if($0 ~ /^#version .*/)
71 version=$2
72 next
74 else if($0 ~ /^#language .*/)
76 language=$2
77 next
81 if($0 ~ /^MSG_.*\(.*\)/)
83 if(tagfound == 1)
85 # this is the end of the current
86 # tag so lets output it in PO-format
87 print ""
88 #print "#: " msgcomment
89 if(length(comment) > 0)
91 print comment
93 print "msgctxt \\"" msgctxt "\\""
94 print "msgid " msgid
95 print "msgstr \\"\\""
98 tagfound=1
100 if(firsttag == 0)
102 print "# version " version
103 print "# language " language
104 print "#"
105 print "#, fuzzy"
106 print "msgid \\"\\""
107 print "msgstr \\"\\""
108 print "\\"Project-Id-Version: " version "\\\\n\\""
109 print "\\"Report-Msgid-Bugs-To: http://URL/\\\\n\\""
110 print "\\"POT-Creation-Date: " date "\\\\n\\""
111 print "\\"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\\\n\\""
112 print "\\"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\\\n\\""
113 print "\\"Language-Team: LANGUAGE <LL@li.org>\\\\n\\""
114 print "\\"MIME-Version: 1.0\\\\n\\""
115 print "\\"Content-Type: text/plain; charset=UTF-8\\\\n\\""
116 print "\\"Content-Transfer-Encoding: 8bit\\\\n\\""
117 print "\\"Language: " language "\\\\n\\""
119 firsttag=1
122 msgctxt=$0
123 msgcomment=$1
125 # proceed with next word
126 next
128 else if($0 ~ /^;$/)
130 if(tagfound == 1)
132 # this is the end of the current
133 # tag so lets output it in PO-format
134 print ""
135 #print "#: " msgcomment
136 if(length(comment) > 0)
138 print comment
140 print "msgctxt \\"" msgctxt "\\""
141 print "msgid " msgid
142 print "msgstr \\"\\""
145 tagfound=0
146 multiline=0
147 comment=""
149 # proceed with next word
150 next
152 else if($0 ~ /^;.+/)
154 if(length(comment) > 0)
156 comment = comment "\\n"
159 tmp=substr($0, 2)
161 # remove any leading white space
162 gsub(/^ /, "", tmp)
164 # replace \\\\ by \\
165 gsub(/\\\\\\\\/, "\\\\", tmp)
167 comment = comment "#. " tmp
168 multiline=0
170 # proceed with next word
171 next
174 if(tagfound == 1)
176 // remove any backslash at the end of line
177 gsub(/\\\\$/, "")
179 # replace \e with \033
180 gsub(/\\\\\\e/, "\\\\033")
182 # replace plain " with \" but make
183 # sure to check if \" is already there
184 gsub(/\\\\"/, "\\"") # replace \" with "
185 gsub(/"/, "\\\\\\"") # replace " with \"
187 # replace \\\\ by \\
188 gsub(/\\\\\\\\/, "\\\\")
190 # we have to escape the \033 and other escape
191 # sequences
192 gsub(/\\\\0/, "\\\\\\\\\\\\0")
193 gsub(/\\\\33/, "\\\\\\\\\\\\033")
195 if(multiline == 0)
197 # the .po format doesn't allow empty msgid
198 # strings, thus lets escape them with <EMPTY>
199 if(length($0) == 0)
201 msgid="\\"<EMPTY>\\""
203 else
205 msgid="\\"" $0 "\\""
208 multiline=1
210 else
212 msgid=msgid "\\n" "\\"" $0 "\\""
218 # the following is an awk script that converts a
219 # gettext PO-style translation template file (.pot)
220 # to an Amiga-style catalog description file (.cd)
221 read -d '' pot2cd << 'EOF'
222 BEGIN {
223 tagfound=0
224 firsttag=0
225 msgidfound=0
226 print "; Catalog description file (Amiga-cd format)"
227 print "; $Id$"
228 print ";"
229 print "; WARNING: This file was automatically generated by cd2po.sh"
230 print ";"
233 if(firsttag == 0)
235 if($0 ~ /^# version .*/)
237 version=$3
238 next
240 else if($0 ~ /^# language .*/)
242 language=$3
243 next
247 if($0 ~ /^msgctxt "MSG_.*/)
249 tagfound=1
250 msgidfound=0
252 if(firsttag == 0)
254 print "#version " version
255 print "#language " language
256 print ";"
258 firsttag=1
261 # extract the tag "MSG_XXXXX (X//)" as tag
262 tag=substr($0, length($1)+2)
264 # strip quotes (") from start&end
265 gsub(/^"/, "", tag)
266 gsub(/"$/, "", tag)
268 else if($0 ~ /^#\. .*/)
270 if(length(comment) > 0)
272 comment = comment "\\n"
275 # replace \\033 with \033
276 gsub(/\\\\\\\\0/, "\\\\0")
277 gsub(/\\\\\\\\33/, "\\\\033")
279 # replace \\ by \\\\
280 gsub(/\\\\\\\\/, "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\")
282 comment = comment "; " substr($0, length($1)+2)
284 else if(length($0) == 0 && length(tag) != 0)
286 tagfound=0
287 msgidfound=0
289 print tag
290 print msgid
291 if(length(comment) > 0)
293 print comment
295 print ";"
297 tag=""
298 comment=""
301 if(tagfound == 1)
303 if($0 ~ /^msgid ".*/)
305 # get the msgid text only
306 tmp=substr($0, length($1)+2)
308 # strip quotes (") from start&end
309 gsub(/^"/, "", tmp)
310 gsub(/"$/, "", tmp)
312 # replace \\033 with \033
313 gsub(/\\\\\\\\0/, "\\\\0", tmp)
314 gsub(/\\\\\\\\33/, "\\\\033", tmp)
316 # replace \\ by \\\\
317 gsub(/\\\\\\\\/, "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\", tmp)
319 if(length(tmp) > 0)
321 # replace "<EMPTY>" with ""
322 gsub(/<EMPTY>.*/, "", tmp)
323 msgid = tmp
325 else
327 msgid=""
330 msgidfound=1
332 else if($0 ~ /^msgstr ".*/)
334 # ignore msgstr
335 msgidfound=0
337 else if(msgidfound == 1)
339 # strip quotes (") from start&end
340 gsub(/^"/, "")
341 gsub(/"$/, "")
343 # replace \\033 with \033
344 gsub(/\\\\\\\\0/, "\\\\0")
345 gsub(/\\\\\\\\33/, "\\\\033")
347 # replace \\ by \\\\
348 gsub(/\\\\\\\\/, "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\")
350 if(length(msgid) > 0)
352 msgid = msgid "\\\\\\n" $0
354 else
356 msgid = $0
361 END {
362 if(length(tag) != 0)
364 print tag
365 print msgid
366 if(length(comment) > 0)
368 print comment
370 print ";"
375 # the following is an awk script that converts an
376 # Amiga-style catalog translation file (.ct) to a
377 # gettext PO-style translation file (.po)
378 read -d '' ct2po << 'EOF'
379 BEGIN {
380 tagfound=0
381 firsttag=0
382 multiline=0
384 # get current date/time
385 cmd="date +'%Y-%m-%d %H:%M%z'"
386 cmd | getline date
388 print "# Catalog translation file (po-style)"
389 print "# $Id$"
390 print "#"
391 print "# WARNING: This file was automatically generated by cd2po.sh"
392 print "#"
395 if(firsttag == 0)
397 if($0 ~ /^## version .*/)
399 version=substr($0, length($1)+length($2)+3)
400 next
402 else if($0 ~ /^## language .*/)
404 language=$3
405 next
407 else if($0 ~ /^## codeset .*/)
409 codeset=$3
410 next
412 else if($0 ~ /^## chunk AUTH .*/)
414 auth=substr($0, length($1)+length($2)+length($3)+4)
415 next
419 if($0 ~ /^MSG_.*$/)
421 tagfound=1
422 multiline=0
423 msgctxt=""
424 msgid=""
425 comment=""
427 if(firsttag == 0)
429 print "# version " version
430 print "# language " language
431 print "# codeset " codeset
432 print "# chunk AUTH " auth
433 print "#"
434 print "# Translators:"
435 print "msgid \\"\\""
436 print "msgstr \\"\\""
437 print "\\"Project-Id-Version: " version "\\\\n\\""
438 print "\\"Report-Msgid-Bugs-To: http://URL/\\\\n\\""
439 print "\\"POT-Creation-Date: " date "\\\\n\\""
440 print "\\"PO-Revision-Date: " date "\\\\n\\""
441 print "\\"Last-Translator: " auth "\\\\n\\""
442 print "\\"Language-Team: " language "\\\\n\\""
443 print "\\"MIME-Version: 1.0\\\\n\\""
444 print "\\"Content-Type: text/plain; charset=UTF-8\\\\n\\""
445 print "\\"Content-Transfer-Encoding: 8bit\\\\n\\""
446 print "\\"Language: " language "\\\\n\\""
448 firsttag=1
451 # now we have to search in the CD file for the same string
452 cmd="sed -n '/^" $1 " (/,/^;$/p' *.cd"
453 while((cmd | getline output) > 0)
455 if(output ~ /^MSG_.*$/)
457 msgctxt=output
459 else if(output ~ /^;.+$/)
461 # replace \\\\ by \\
462 gsub(/\\\\\\\\/, "\\\\", output)
464 if(length(comment) > 0)
466 comment = comment "\\n"
469 tmp=substr(output, 2)
470 gsub(/^ /, "", tmp)
471 comment = comment "#. " tmp
473 else if(output ~ /^;$/)
475 # nothing
477 else if(length(msgctxt) > 0)
479 # remove any backslash at the end of line
480 gsub(/\\\\$/, "", output)
482 # replace \e with \033
483 gsub(/\\\\\\e/, "\\\\033", output)
485 # replace plain " with \" but make
486 # sure to check if \" is already there
487 gsub(/\\\\"/, "\\"", output) # replace \" with "
488 gsub(/"/, "\\\\\\"", output) # replace " with \"
490 # replace \\\\ by \\
491 gsub(/\\\\\\\\/, "\\\\", output)
493 # replace \033 with \\033
494 gsub(/\\\\0/, "\\\\\\\\\\\\0", output)
495 gsub(/\\\\33/, "\\\\\\\\\\\\033", output)
497 if(length(msgid) > 0)
499 msgid = msgid "\\n"
502 msgid = msgid "\\"" output "\\""
505 close(cmd)
507 if(length(msgctxt) == 0)
509 tagfound=0
512 next
514 else if($0 ~ /^;.*$/)
516 if(tagfound == 1)
518 # output the stuff
519 print ""
520 if(length(comment) > 0)
522 print comment
524 print "msgctxt \\"" msgctxt "\\""
526 # the .po format doesn't allow empty msgid
527 # strings, thus lets escape them with <EMPTY>
528 if(length(msgid) <= 2)
530 print "msgid \\"<EMPTY>\\""
532 else
534 # find out if this msgid is a multiline msgid or not
535 if(msgid ~ /\\n/)
537 print "msgid \\"\\""
538 print msgid
540 else
542 print "msgid " msgid
546 print "msgstr \\"" msgstr "\\""
548 tagfound=0
549 multiline=0
552 next
555 if(tagfound == 1)
557 # remove any backslash at the end of line
558 gsub(/\\\\$/, "")
560 # replace \e with \033
561 gsub(/\\\\\\e/, "\\\\033")
563 # replace plain " with \" but make
564 # sure to check if \" is already there
565 gsub(/\\\\"/, "\\"") # replace \" with "
566 gsub(/"/, "\\\\\\"") # replace " with \"
568 # replace \033 with \\033
569 gsub(/\\\\0/, "\\\\\\\\\\\\0")
570 gsub(/\\\\33/, "\\\\\\\\\\\\033")
572 if(multiline == 0)
574 msgstr = $0
575 multiline=1
577 else
579 msgstr = msgstr $0
585 # the following is an awk script that converts a
586 # gettext PO-style translation file (.po) to an
587 # Amiga-style catalog translation file (.ct).
588 read -d '' po2ct << 'EOF'
589 BEGIN {
590 tagfound=0
591 firsttag=0
592 msgidfound=0
593 msgstrfound=0
596 if(firsttag == 0)
598 if($0 ~ /^#? version .*/)
600 version=substr($0, length($1)+length($2)+3)
601 next
603 else if($0 ~ /^# \\$Id: .* \\$$/)
605 revision=$4 # get revision out of $Id$ SVN keyword
606 next
608 else if($0 ~ /^## language .*/)
610 language=$3
611 next
613 else if($0 ~ /^## codeset .*/)
615 codeset=$3
616 next
618 else if($0 ~ /^## chunk AUTH .*/)
620 auth=substr($0, length($1)+length($2)+length($3)+4)
621 next
623 else if($0 ~ /^"PO-Revision-Date: .*"/)
625 revdate=substr($0, length($1)+2)
626 gsub(/\\\\n"/, "", revdate);
628 # parse the revision date
629 cmd="date +'%d.%m.%Y' -d \\"" revdate "\\""
630 cmd | getline revdate
632 next
634 else if($0 ~ /^"Language: .*"/)
636 language=substr($0, length($1)+2)
637 gsub(/\\\\n"/, "", language);
638 next
640 else if($0 ~ /^"Language-Team: .*"/)
642 auth=substr($0, length($1)+2)
643 gsub(/\\\\n"/, "", auth);
644 next
648 if($0 ~ /^msgctxt "MSG_.*/)
650 tagfound=1
651 msgidfound=0
652 msgstrfound=0
653 msgid=""
654 msgstr=""
656 if(firsttag == 0)
658 print "## version $VER: XXXX.catalog " version "." revision " (" revdate ")"
659 print "## language " lang
660 print "## codeset " cset
661 print "## chunk AUTH " auth
662 print ";"
663 print "; $Id$"
664 print ";"
666 firsttag=1
669 # strip quotes (") so that we get the plain MSG_XXXX
670 # tag names
671 gsub(/"/, "", $2);
672 tag=$2
674 else if(length($0) == 0 && length(tag) != 0)
676 tagfound=0
677 msgidfound=0
678 msgstrfound=0
680 if(length(msgstr) > 0)
682 print tag
683 print msgstr
684 print msgid
685 print ";"
688 tag=""
691 if(tagfound == 1)
693 if($0 ~ /^msgid ".*/)
695 # get the msgid text only
696 tmp=substr($0, length($1)+2)
698 # strip quotes (") from start&end
699 gsub(/^"/, "", tmp)
700 gsub(/"$/, "", tmp)
702 # replace \\033 with \033
703 gsub(/\\\\\\\\0/, "\\\\0", tmp)
704 gsub(/\\\\\\\\33/, "\\\\033", tmp)
706 if(length(tmp) > 0)
708 if(length(msgid) > 0)
710 msgid = msgid "\\\\\\n; " tmp
712 else
714 msgid = "; " tmp
718 msgstrfound=0
719 msgidfound=1
721 else if($0 ~ /^msgstr ".*/)
723 # get the msgid text only
724 tmp=substr($0, length($1)+2)
726 # strip quotes (") from start&end
727 gsub(/^"/, "", tmp)
728 gsub(/"$/, "", tmp)
730 # replace \\033 with \033
731 gsub(/\\\\\\\\0/, "\\\\0", tmp)
732 gsub(/\\\\\\\\33/, "\\\\033", tmp)
734 if(length(tmp) > 0)
736 # replace "<EMPTY>" with ""
737 gsub(/<EMPTY>.*/, "", tmp)
739 if(length(msgstr) > 0)
741 msgstr = msgstr "\\\\\\n" tmp
743 else
745 msgstr = tmp
749 msgstrfound=1
750 msgidfound=0
752 else if(msgidfound == 1)
754 # strip quotes (") from start&end
755 gsub(/^"/, "")
756 gsub(/"$/, "")
758 # replace \\033 with \033
759 gsub(/\\\\\\\\0/, "\\\\0")
760 gsub(/\\\\\\\\33/, "\\\\033")
762 if(length($0) > 0)
764 if(length(msgid) > 0)
766 msgid = msgid "\\\\\\n; " $0
768 else
770 msgid = "; " $0
774 else if(msgstrfound == 1)
776 # strip quotes (") from start&end
777 gsub(/^"/, "")
778 gsub(/"$/, "")
780 # replace \\033 with \033
781 gsub(/\\\\\\\\0/, "\\\\0")
782 gsub(/\\\\\\\\33/, "\\\\033")
784 if(length($0) > 0)
786 if(length(msgstr) > 0)
788 msgstr = msgstr "\\\\\\n" $0
790 else
792 msgstr = $0
798 END {
799 if(length(tag) != 0 && length(msgstr) > 0)
801 print tag
802 print msgstr
803 print msgid
804 print ";"
809 ###################################################
810 identifyCharset()
812 file="$1"
813 charset=""
815 case "${file}" in
816 bosnian)
817 charset="iso-8859-2"
819 catalan)
820 charset="iso-8859-15"
822 croatian)
823 charset="iso-8859-16"
825 czech)
826 charset="iso-8859-2"
828 danish)
829 charset="iso-8859-15"
831 dutch)
832 charset="iso-8859-15"
834 finnish)
835 charset="iso-8859-15"
837 french)
838 charset="iso-8859-15"
840 german)
841 charset="iso-8859-15"
843 greek)
844 charset="iso-8859-7"
846 hungarian)
847 charset="iso-8859-16"
849 italian)
850 charset="iso-8859-15"
852 norwegian)
853 charset="iso-8859-15"
855 polish)
856 charset="iso-8859-16"
858 russian)
859 charset="windows-1251" # this should be "Amiga-1251" but iconv doesn't support it :(
861 serbian)
862 charset="iso-8859-16"
864 slovenian)
865 charset="iso-8859-2"
867 spanish)
868 charset="iso-8859-15"
870 swedish)
871 charset="iso-8859-15"
873 turkish)
874 charset="iso-8859-9"
877 charset="iso-8859-1"
879 esac
881 echo ${charset}
884 identifyCodeset()
886 file="$1"
887 codeset=""
889 case "${file}" in
890 bosnian)
891 codeset="5"
893 catalan)
894 codeset="111"
896 croatian)
897 codeset="112"
899 czech)
900 codeset="5"
902 danish)
903 codeset="111"
905 dutch)
906 codeset="111"
908 finnish)
909 codeset="111"
911 french)
912 codeset="111"
914 german)
915 codeset="111"
917 greek)
918 codeset="10"
920 hungarian)
921 codeset="112"
923 italian)
924 codeset="111"
926 norwegian)
927 codeset="111"
929 persian)
930 codeset="0"
932 polish)
933 codeset="112"
935 russian)
936 codeset="2104" # 'Amiga-1251'
938 serbian)
939 codeset="112"
941 slovenian)
942 codeset="5"
944 spanish)
945 codeset="111"
947 swedish)
948 codeset="111"
950 turkish)
951 codeset="12"
954 codeset="4"
956 esac
958 echo ${codeset}
961 identifyLanguage()
963 file="$1"
964 language=""
966 case "${file}" in
967 bosnian)
968 language="bosanski"
970 catalan)
971 language="català"
973 croatian)
974 language="hrvatski"
976 danish)
977 language="dansk"
979 dutch)
980 language="nederlands"
982 english-british)
983 language="english-british"
985 finnish)
986 language="suomi"
988 french)
989 language="français"
991 german)
992 language="deutsch"
994 hungarian)
995 language="magyar"
997 italian)
998 language="italiano"
1000 japanese)
1001 language="nihongo"
1003 korean)
1004 language="hangul"
1006 norwegian)
1007 language="norsk"
1009 persian)
1010 language="farsi"
1012 polish)
1013 language="polski"
1015 portuguese)
1016 language="português"
1018 portuguese-brazil)
1019 language="português-brasil"
1021 serbian)
1022 language="srpski"
1024 slovenian)
1025 language="slovensko"
1027 spanish)
1028 language="español"
1030 swedish)
1031 language="svenska"
1033 turkish)
1034 language="türkçe"
1037 language=${file}
1039 esac
1041 echo ${language}
1045 ###################################################
1046 charset=""
1047 inputfile="$1"
1049 # parse the command-line options
1050 while getopts "c:" opt
1052 case "$opt" in
1053 c) charset="$OPTARG";;
1054 \?) # unknown flag
1055 displayUsage
1056 exit 2;;
1057 esac
1058 done
1059 shift `expr $OPTIND - 1`
1061 if [ -z "${inputfile}" ]; then
1062 displayUsage
1063 exit 2
1066 # lets identify by the file extension which operation to perform
1067 fname=$(basename "${inputfile}")
1068 filename="${fname%.*}"
1069 extension="${fname##*.}"
1070 case "${extension}" in
1071 cd) # convert from cd -> pot
1072 if [ -z "${charset}" ]; then
1073 charset="iso-8859-1"
1075 iconv -c -f "${charset}" -t utf8 ${inputfile} | awk "${cd2pot}"
1077 ct) # convert from ct -> po
1078 if [ -z "${charset}" ]; then
1079 charset=$(identifyCharset ${filename})
1081 iconv -c -f ${charset} -t utf8 ${inputfile} | awk "${ct2po}"
1083 po) # convert from po -> ct
1084 if [ -z "${charset}" ]; then
1085 charset=$(identifyCharset ${filename})
1086 codeset=$(identifyCodeset ${filename})
1088 lang=$(identifyLanguage ${filename})
1089 awk -v lang=${lang} -v cset=${codeset} "${po2ct}" ${inputfile} | iconv -c -f utf8 -t ${charset}
1091 pot) # convert from pot -> cd
1092 if [ -z "${charset}" ]; then
1093 charset="iso-8859-1"
1095 awk "${pot2cd}" ${inputfile} | iconv -c -f utf8 -t ${charset}
1097 esac
1099 exit 0