apertium-eval-translator/eval-translator.pl

   1
   2 # (c) 2006 Felipe Sánchez Martínez
   3 # (c) 2006 Universitat d'Alacant
   4 #
   5 # This software calculates the word error rate (WER) and the
   6 # position-independent word error rate (PER) between the translation
   7 # performed by the apertium MT system an a reference translation
   8 # obtained by post-editing the system ouput.
   9 #
  10 # The edit_distance procedure used in this script is based on
  11 # the Levenshtein distance implementation by Jorge Mas Trullenque
  12 # that can be found in http://www.merriampark.com/ldperl2.htm
  13 #
  14 # This software is licensed under the GPL license version 2, or at
  15 # your option any later version
  16 #
  17
  18 use strict;
  19 use warnings;
  20
  21 # Getting command line arguments:
  22 use Getopt::Long;
  23 # Documentation:
  24 use Pod::Usage;
  25 # I/O Handler
  26 use IO::Handle;
  27
  28 my($test, $ref, $help, $beam, $version);
  29
  30 my($nunknown, $ntest, $nref);
  31
  32 my($test_corpus, $ref_corpus);
  33
  34 # Command line arguments
  35 GetOptions( 'test|t=s'           => \$test,
  36             'ref|r=s'            => \$ref,
  37             'beam|b=n'           => \$beam,
  38             'help|h'             => \$help,
  39             'version|v'          => \$version,
  40           ) || pod2usage(2);
  41
  42 if ($version) {
  43    print "apertium-eval-translator 1.0.0\n";
  44    exit 0;
  45 }
  46
  47 pod2usage(2) if $help;
  48 pod2usage(2) unless ($test);
  49 pod2usage(2) unless ($ref);
  50
  51 $beam=0 unless ($beam);
  52 $beam=0 if ($beam<0);
  53
  54 open(TEST, "<$test") or die "Error: Cannot open test file \'$test\': $!\n";
  55 open(REF, "<$ref") or die "Error: Cannot open reference file \'$ref\': $!\n";
  56
  57 undef $/; #To read whole files at one step
  58
  59 $_=<TEST>;
  60 &preprocess;
  61 $test_corpus=$_;
  62 $nunknown=s/[*](\w+)/$1/g;
  63 my @words_test = split /[\s\n]+/;
  64 $ntest=@words_test;
  65 close(TEST);
  66
  67 $_=<REF>;
  68 &preprocess;
  69 $ref_corpus=$_;
  70 my @words_ref = split /[\s\n]+/;
  71 $nref=@words_ref;
  72 close(REF);
  73
  74 print "Test file: '$test'\n";
  75 print "Reference file '$ref'\n\n";
  76
  77 print "Statistics about input files\n";
  78 print "-------------------------------------------------------\n";
  79 print "Number of words in reference: $nref\n";
  80 print "Number of words in test: $ntest\n";
  81 print "Number of unknown words (marked with a star) in test: $nunknown\n";
  82 print "Percentage of unknown words: ", sprintf("%.2f",($nunknown/$ntest)*100), " %\n";
  83 print "\n";
  84
  85 my $distance_nounk=&edit_distance;
  86 print "Results when removing unknown-word marks (stars)\n";
  87 print "-------------------------------------------------------\n";
  88 print "Edit distance: $distance_nounk\n";
  89 print "Word error rate (WER): ", sprintf("%.2f",($distance_nounk/$ntest)*100), " %\n";
  90 my $per_nounk=&position_independent_errors;
  91 print "Number of position-independent word errors: ",  $per_nounk, "\n";
  92 print "Position-independent word error rate (PER): ", sprintf("%.2f",($per_nounk/$ntest)*100), " %\n";
  93
  94 print "\n";
  95
  96 $_=$test_corpus;
  97 @words_test = split /[\s\n]+/;
  98
  99 $_=$ref_corpus;
 100 @words_ref = split /[\s\n]+/;
 101
 102 my $distance=&edit_distance;
 103 print "Results when unknown-word marks (stars) are not removed\n";
 104 print "-------------------------------------------------------\n";
 105 print "Edit distance: $distance\n";
 106 print "Word Error Rate (WER): ", sprintf("%.2f",($distance/$ntest)*100), " %\n";
 107 my $per=&position_independent_errors;
 108 print "Number of position-independent word errors: ",  $per, "\n";
 109 print "Position-independent word error rate (PER): ", sprintf("%.2f",($per/$ntest)*100), " %\n";
 110
 111 print "\n";
 112
 113 print "Statistics about the translation of unknown words\n";
 114 print "-------------------------------------------------------\n";
 115 print "Number of unknown words which were free rides: ", $distance-$distance_nounk, "\n";
 116 print "Percentage of unknown words that were free rides: ",
 117        sprintf("%.2f",(($distance-$distance_nounk)/$nunknown)*100), " %\n";
 118
 119
 120 sub position_independent_errors {
 121
 122   #Words that have no matching counterparts are counted as
 123   #substitution errors. Depending on which corpus, test or reference,
 124   #is longer the remaining words result in either insertion or
 125   #deletion errors.  The number of position-independent errors are
 126   #always less or equal to the edit distance.
 127
 128   my (%hash_test, %hash_ref);
 129
 130   foreach (sort @words_test) {
 131     if (defined($hash_test{$_})) {
 132       $hash_test{$_}++;
 133     } else {
 134       $hash_test{$_}=1;
 135     }
 136   }
 137
 138   foreach (sort @words_ref) {
 139     if (defined($hash_ref{$_})) {
 140       $hash_ref{$_}++;
 141     } else {
 142       $hash_ref{$_}=1;
 143     }
 144   }
 145
 146   my $ntest = @words_test;
 147   my $nref = @words_ref;
 148
 149   my $sum=0;
 150
 151   foreach (keys %hash_test) {
 152     if(defined($hash_ref{$_})) {
 153       $sum+=abs($hash_test{$_}-$hash_ref{$_});
 154     } else {
 155       $sum+=$hash_test{$_};
 156     }
 157     $hash_test{$_}=0;
 158     $hash_ref{$_}=0;
 159   }
 160
 161   foreach (keys %hash_ref) {
 162     if(defined($hash_test{$_})) {
 163       $sum+=abs($hash_ref{$_}-$hash_test{$_});
 164     } else {
 165       $sum+=$hash_ref{$_};
 166     }
 167     $hash_ref{$_}=0;
 168     $hash_test{$_}=0;
 169   }
 170
 171   return (abs($ntest-$nref)+$sum)/2;
 172 }
 173
 174 sub edit_distance {
 175   my @W=(0..@words_ref);
 176   my ($i, $j, $cur, $next);
 177
 178   my ($lim_inf, $lim_sup, $best_j);
 179   $best_j=0;
 180   for $i (0..$#words_test) {
 181     $cur=$i+1;
 182
 183     if($beam>0) {
 184       $lim_inf=$best_j-$beam;
 185       $lim_inf=0 if ($lim_inf<0);
 186
 187       $lim_sup=$best_j+$beam;
 188       $lim_sup=$#words_ref if ($lim_sup>$#words_ref);
 189     } else {
 190       $lim_inf=0;
 191       $lim_sup=$#words_ref;
 192     }
 193
 194     for $j ($lim_inf..$lim_sup){
 195       my $cost=($words_test[$i] ne $words_ref[$j]);
 196       $next=min([$W[$j+1]+1, $cur+1, $cost+$W[$j]]);
 197       $W[$j]=$cur;
 198
 199       $best_j=$j+1 if ($cur > $next);
 200
 201       $cur=$next;
 202     }
 203     $W[@words_ref]=$next;
 204   }
 205   return $next;
 206 }
 207
 208 sub min {
 209   my @list = @{$_[0]};
 210   my $min = $list[0];
 211
 212   foreach my $i (@list) {
 213     $min = $i if ($i < $min);
 214   }
 215    return $min;
 216 }
 217
 218 sub preprocess {
 219   #Insert spaces before and after  punctuation marks
 220   #s/([.,;:%¿?¡!()\[\]{}<>])/ $1 /g;
 221 }
 222
 223
 224 __END__
 225
 226
 227 =head1 NAME
 228
 229 =head1 SYNOPSIS
 230
 231 apertium-eval-translator -test testfile -ref reffile [-beam <n>]
 232
 233 Options:
 234
 235   -test|-t     Specify the file with the translation to evaluate
 236   -ref|-r      Specify the file with the reference translation
 237   -beam|-b     Perform a beam search by looking only to the <n> previous
 238                and <n> posterior neigboring words (optional parameter
 239                to make the evaluation much faster)
 240   -help|-h     Show this help message
 241   -version|-v  Show version information and exit
 242
 243 Note: The <n> value provided with -beam is language-pair dependent.
 244       The closer the languages involved are, the lesser <n> can be
 245       without affecting the evaluation results.  This parameter only
 246       affects the WER evaluation.
 247
 248 Note: Reference translation MUST have no unknown-word marks, even if
 249       they are free rides.
 250
 251 This software calculates (at document level) the word error rate (WER)
 252 and the postion-independent word error rate (PER) between a
 253 translation performed by the Apertium MT system and a reference
 254 translation obtained by post-editing the system ouput.
 255
 256 It is assumed that unknow words are marked with a start (*), as
 257 Apertium does; nevertheless, it can be easily adapted to evaluate
 258 other MT systems that do not mark unknown words with a star.
 259
 260 (c) 2006 Felipe Sánchez Martínez
 261 (c) 2006 Universitat d'Alacant
 262
 263 This software is licensed under the GNU GENERAL PUBLIC LICENSE version
 264 2, or at your option any latter version. See
 265 http://www.gnu.org/copyleft/gpl.html for a complete version of the
 266 license.