Adding
[apertium.git] / apertium-eval-translator / eval-translator.pl
blob5b27f01932115dd435fff4507346214a98ce3395
2 # (c) 2006 Felipe Sánchez Martínez
3 # (c) 2006 Universitat d'Alacant
5 # This software calculates the word error rate (WER) and the
6 # position-independent word error rate (PER) between the translation
7 # performed by the apertium MT system an a reference translation
8 # obtained by post-editing the system ouput.
9 #
10 # The edit_distance procedure used in this script is based on
11 # the Levenshtein distance implementation by Jorge Mas Trullenque
12 # that can be found in http://www.merriampark.com/ldperl2.htm
14 # This software is licensed under the GPL license version 2, or at
15 # your option any later version
18 use strict;
19 use warnings;
21 # Getting command line arguments:
22 use Getopt::Long;
23 # Documentation:
24 use Pod::Usage;
25 # I/O Handler
26 use IO::Handle;
28 my($test, $ref, $help, $beam, $version);
30 my($nunknown, $ntest, $nref);
32 my($test_corpus, $ref_corpus);
34 # Command line arguments
35 GetOptions( 'test|t=s' => \$test,
36 'ref|r=s' => \$ref,
37 'beam|b=n' => \$beam,
38 'help|h' => \$help,
39 'version|v' => \$version,
40 ) || pod2usage(2);
42 if ($version) {
43 print "apertium-eval-translator 1.0.0\n";
44 exit 0;
47 pod2usage(2) if $help;
48 pod2usage(2) unless ($test);
49 pod2usage(2) unless ($ref);
51 $beam=0 unless ($beam);
52 $beam=0 if ($beam<0);
54 open(TEST, "<$test") or die "Error: Cannot open test file \'$test\': $!\n";
55 open(REF, "<$ref") or die "Error: Cannot open reference file \'$ref\': $!\n";
57 undef $/; #To read whole files at one step
59 $_=<TEST>;
60 &preprocess;
61 $test_corpus=$_;
62 $nunknown=s/[*](\w+)/$1/g;
63 my @words_test = split /[\s\n]+/;
64 $ntest=@words_test;
65 close(TEST);
67 $_=<REF>;
68 &preprocess;
69 $ref_corpus=$_;
70 my @words_ref = split /[\s\n]+/;
71 $nref=@words_ref;
72 close(REF);
74 print "Test file: '$test'\n";
75 print "Reference file '$ref'\n\n";
77 print "Statistics about input files\n";
78 print "-------------------------------------------------------\n";
79 print "Number of words in reference: $nref\n";
80 print "Number of words in test: $ntest\n";
81 print "Number of unknown words (marked with a star) in test: $nunknown\n";
82 print "Percentage of unknown words: ", sprintf("%.2f",($nunknown/$ntest)*100), " %\n";
83 print "\n";
85 my $distance_nounk=&edit_distance;
86 print "Results when removing unknown-word marks (stars)\n";
87 print "-------------------------------------------------------\n";
88 print "Edit distance: $distance_nounk\n";
89 print "Word error rate (WER): ", sprintf("%.2f",($distance_nounk/$ntest)*100), " %\n";
90 my $per_nounk=&position_independent_errors;
91 print "Number of position-independent word errors: ", $per_nounk, "\n";
92 print "Position-independent word error rate (PER): ", sprintf("%.2f",($per_nounk/$ntest)*100), " %\n";
94 print "\n";
96 $_=$test_corpus;
97 @words_test = split /[\s\n]+/;
99 $_=$ref_corpus;
100 @words_ref = split /[\s\n]+/;
102 my $distance=&edit_distance;
103 print "Results when unknown-word marks (stars) are not removed\n";
104 print "-------------------------------------------------------\n";
105 print "Edit distance: $distance\n";
106 print "Word Error Rate (WER): ", sprintf("%.2f",($distance/$ntest)*100), " %\n";
107 my $per=&position_independent_errors;
108 print "Number of position-independent word errors: ", $per, "\n";
109 print "Position-independent word error rate (PER): ", sprintf("%.2f",($per/$ntest)*100), " %\n";
111 print "\n";
113 print "Statistics about the translation of unknown words\n";
114 print "-------------------------------------------------------\n";
115 print "Number of unknown words which were free rides: ", $distance-$distance_nounk, "\n";
116 print "Percentage of unknown words that were free rides: ",
117 sprintf("%.2f",(($distance-$distance_nounk)/$nunknown)*100), " %\n";
120 sub position_independent_errors {
122 #Words that have no matching counterparts are counted as
123 #substitution errors. Depending on which corpus, test or reference,
124 #is longer the remaining words result in either insertion or
125 #deletion errors. The number of position-independent errors are
126 #always less or equal to the edit distance.
128 my (%hash_test, %hash_ref);
130 foreach (sort @words_test) {
131 if (defined($hash_test{$_})) {
132 $hash_test{$_}++;
133 } else {
134 $hash_test{$_}=1;
138 foreach (sort @words_ref) {
139 if (defined($hash_ref{$_})) {
140 $hash_ref{$_}++;
141 } else {
142 $hash_ref{$_}=1;
146 my $ntest = @words_test;
147 my $nref = @words_ref;
149 my $sum=0;
151 foreach (keys %hash_test) {
152 if(defined($hash_ref{$_})) {
153 $sum+=abs($hash_test{$_}-$hash_ref{$_});
154 } else {
155 $sum+=$hash_test{$_};
157 $hash_test{$_}=0;
158 $hash_ref{$_}=0;
161 foreach (keys %hash_ref) {
162 if(defined($hash_test{$_})) {
163 $sum+=abs($hash_ref{$_}-$hash_test{$_});
164 } else {
165 $sum+=$hash_ref{$_};
167 $hash_ref{$_}=0;
168 $hash_test{$_}=0;
171 return (abs($ntest-$nref)+$sum)/2;
174 sub edit_distance {
175 my @W=(0..@words_ref);
176 my ($i, $j, $cur, $next);
178 my ($lim_inf, $lim_sup, $best_j);
179 $best_j=0;
180 for $i (0..$#words_test) {
181 $cur=$i+1;
183 if($beam>0) {
184 $lim_inf=$best_j-$beam;
185 $lim_inf=0 if ($lim_inf<0);
187 $lim_sup=$best_j+$beam;
188 $lim_sup=$#words_ref if ($lim_sup>$#words_ref);
189 } else {
190 $lim_inf=0;
191 $lim_sup=$#words_ref;
194 for $j ($lim_inf..$lim_sup){
195 my $cost=($words_test[$i] ne $words_ref[$j]);
196 $next=min([$W[$j+1]+1, $cur+1, $cost+$W[$j]]);
197 $W[$j]=$cur;
199 $best_j=$j+1 if ($cur > $next);
201 $cur=$next;
203 $W[@words_ref]=$next;
205 return $next;
208 sub min {
209 my @list = @{$_[0]};
210 my $min = $list[0];
212 foreach my $i (@list) {
213 $min = $i if ($i < $min);
215 return $min;
218 sub preprocess {
219 #Insert spaces before and after punctuation marks
220 #s/([.,;:%¿?¡!()\[\]{}<>])/ $1 /g;
224 __END__
227 =head1 NAME
229 =head1 SYNOPSIS
231 apertium-eval-translator -test testfile -ref reffile [-beam <n>]
233 Options:
235 -test|-t Specify the file with the translation to evaluate
236 -ref|-r Specify the file with the reference translation
237 -beam|-b Perform a beam search by looking only to the <n> previous
238 and <n> posterior neigboring words (optional parameter
239 to make the evaluation much faster)
240 -help|-h Show this help message
241 -version|-v Show version information and exit
243 Note: The <n> value provided with -beam is language-pair dependent.
244 The closer the languages involved are, the lesser <n> can be
245 without affecting the evaluation results. This parameter only
246 affects the WER evaluation.
248 Note: Reference translation MUST have no unknown-word marks, even if
249 they are free rides.
251 This software calculates (at document level) the word error rate (WER)
252 and the postion-independent word error rate (PER) between a
253 translation performed by the Apertium MT system and a reference
254 translation obtained by post-editing the system ouput.
256 It is assumed that unknow words are marked with a start (*), as
257 Apertium does; nevertheless, it can be easily adapted to evaluate
258 other MT systems that do not mark unknown words with a star.
260 (c) 2006 Felipe Sánchez Martínez
261 (c) 2006 Universitat d'Alacant
263 This software is licensed under the GNU GENERAL PUBLIC LICENSE version
264 2, or at your option any latter version. See
265 http://www.gnu.org/copyleft/gpl.html for a complete version of the
266 license.