More new patterns
[aristoteles.git] / aristoteles.pl
blob40a768c2f64eaa4d0efad32509189b071a85724d
1 #!/usr/bin/env perl
3 # aristoteles - a regex-based, just-for-fun Italian-to-Portuguese translator
4 # Copyright (C) 2009 Davide Mancusi <arekfu@gmail.com>
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 use strict;
20 use warnings;
21 use Tie::IxHash;
23 binmode(STDOUT, ":utf8");
24 binmode(DATA, ":utf8");
26 # Check the number of arguments
27 die "Usage: ", $0, " <filename>\n" unless $#ARGV == 0;
29 # Read the pattern dictionary
30 tie my %patdic, 'Tie::IxHash';
32 while( <DATA> ) {
33 chomp;
34 my ($search, $subst) = split /\t+/;
35 $patdic{$search} = $subst;
38 # Open file to translate
39 open( my $infile, "<utf8", $ARGV[0] ) or die "Can't open input file: $!\n";
41 # Translate!
42 while( <$infile> ) {
43 foreach my $search (keys %patdic) {
44 eval "s/$search/$patdic{$search}/g";
45 if( $search =~ /^\\b([[:lower:]].*)/ )
47 my $search2 = '\b'.ucfirst($1);
48 my $subst2 = ucfirst($patdic{$search});
49 eval "s/$search2/$subst2/g";
52 print;
55 __END__
56 \bdove\b onde
57 acqua\b agua
58 \blatte\b leite
59 mezzo meio
60 mezza meia
61 \bche\b que
62 \bchi\b quem
63 \bè\b é
64 \be'\b é
65 \bho\b tenho
66 \bhai?\b tem
67 \babbiamo\b tem
68 \bavete\b tem
69 \bhanno\b tem
70 \bcos(ì|i')\b assim
71 \bperch(é|e')\b porque
72 \bbuon(o)?\b bom
73 \bbuona\b boa
74 \bdestra\b direita
75 \bsinistra\b esquerda
76 \btutto\b todo
77 \btutta\b toda
78 \btutti\b todos
79 \btutte\b todas
80 \bquesto\b este
81 \bquesta\b esta
82 \bquel(lo)?\b esse
83 \bquella?\b essa
84 \bil\b o
85 \blo\b o
86 \bl'([[:alpha:]]+)(a|i|u)\b a $1$2
87 \bl'([[:alpha:]]+)(o|e)\b o $1$2
88 \bla\b a
89 \bi\b os
90 \bgli\b os
91 \ble\b as
92 \b(di|a|da|in|con|su|per|fra|tra)\Wme\b $1 mim
93 \b(di|a|da|in|con|su|per|fra|tra)\Wte\b $1 ti
94 \bal(lo)?\b ao
95 \balla\b à
96 \ba(i|gli)\b aos
97 \balle\b às
98 \ball'([[:alpha:]]+)(a|i)\b à $1$2
99 \ball'([[:alpha:]]+)(o|e)\b ao $1$2
100 \bdal(lo)?\b do
101 \bdalla\b da
102 \bda(i|gli)\b dos
103 \bdalle\b das
104 \bda\b de
105 \bdall'([[:alpha:]]+)(a|i)\b da $1$2
106 \bdall'([[:alpha:]]+)(o|e|u)\b do $1$2
107 \bsul(lo)?\b sobre o
108 \bsulla\b sobre a
109 \bsu(i|gli)\b sobre os
110 \bsulle\b sobre as
111 \bsu\b sobre
112 \bdell'([[:alpha:]]+)(a|i)\b da $1$2
113 \bdell'([[:alpha:]]+)(o|e|u)\b do $1$2
114 \bdel(lo)?\b do
115 \bdella\b da
116 \bde(i|gli)\b dos
117 \bdelle\b das
118 \bdi\b de
119 \bdell'([[:alpha:]]+)(a|i)\b da $1$2
120 \bdell'([[:alpha:]]+)(o|e|u)\b do $1$2
121 \bnel(lo)?\b no
122 \bnella\b na
123 \bne(i|gli)\b nos
124 \bnelle\b nas
125 \bnell'([[:alpha:]]+)(a|i)\b à $1$2
126 \bnell'([[:alpha:]]+)(o|e)\b ao $1$2
127 \bcon\b com
128 \bcome\b como
129 \bsotto\b debaixo
130 \bnon?\b não
131 \bun' uma
132 \buno?\b um
133 \buna\b uma
134 \bdue\b dois
135 \btre\b três
136 \bquattro\b quatro
137 \bcinque\b cinco
138 \bsei\b seis
139 \bsette\b sete
140 otto\b oito
141 \bdieci\b dez
142 \bundici\b onze
143 \bdodici\b doze
144 \btredici\b treze
145 \bquattordici\b catorze
146 \bquindici\b quinze
147 \bsedici\b dezasseis
148 \bdiciassette\b dezassete
149 \bdiciotto\b dezoito
150 \bdiciannove\b dezanove
151 \bper\b para
152 \bin\b em
153 \bpi(ù|u')\b mais
154 \bma\b mas
155 ssione\b ção
156 ssioni\b ções
157 sione\b ção
158 sioni\b ções
159 zione\b ção
160 zioni\b ções
161 ine\b inhas
162 one\b ão
163 ini\b inhos
164 oni\b ãos
165 ina\b inha
166 ona\b oa
167 ino\b inho
168 one\b oas
169 ico\b igo
170 (b|c|d|f|g|l|m|n|p|q|t|v|z)\1 $1
171 ([^sl[:^alpha:]])ce\b $1z
172 sc(ia|e|i|io|iu) c$1
173 gli?(a|e|i|o|u) lh$1
174 gni?(a|e|i|o|u) nh$1
175 cchi lh
176 \bpi(a|e|o|u) ch$1
177 \bs(t|p|d|f|c|b) es$1
178 (a|e|i|o)(r|l)e\b $1$2
179 zi(a|e|o|u) ci$1
180 ol(o|a)\b ul$1
181 at(o|a)\b ad$1
182 \b tade
183 ta'\b tade
184 (a|e)nza\b $1nça
185 (a|e)nze\b $1nças
186 che\b cas
187 chi\b cos
188 uo\w o
189 ie\w e