més
[apertium.git] / apertium-tagger-training-tools-unicode / src / gen_dic_file.sh
blob222b9f13a5f9d8a28d1b4296ec5657edeb4083b0
2 if [[ -z $1 || -z $2 || -z $3 ]]
3 then
4 echo "USAGE: $(basename $0) dictionary.dix dictionary.bin tagger.tsx" 1>&2
5 exit 1;
6 fi
8 DIXFILE=$1
9 BINFILE=$2
10 TSXFILE=$3
12 echo "Generating dic file" 1>&2
13 echo "This may take some time. Please, take a cup of coffee and come back later." 1>&2
15 apertium-validate-dictionary $DIXFILE
16 apertium-validate-tagger $TSXFILE
19 lt-expand $DIXFILE | grep -v "__REGEXP__" | grep -v ":<:" |\
20 awk 'BEGIN{FS=":>:|:"}{print $1 ".";}' > /tmp/dic.expanded
21 echo "." >> /tmp/dic.expanded
22 echo "?" >> /tmp/dic.expanded
23 echo ";" >> /tmp/dic.expanded
24 echo ":" >> /tmp/dic.expanded
25 echo "!" >> /tmp/dic.expanded
26 echo "42" >> /tmp/dic.expanded
27 echo "," >> /tmp/dic.expanded
28 echo "(" >> /tmp/dic.expanded
29 echo "[" >> /tmp/dic.expanded
30 echo ")" >> /tmp/dic.expanded
31 echo "]" >> /tmp/dic.expanded
32 echo "¿" >> /tmp/dic.expanded
33 echo "¡" >> /tmp/dic.expanded
35 cat /tmp/dic.expanded | apertium-destxt | lt-proc -a $BINFILE | \
36 apertium-filter-ambiguity $TSXFILE
38 rm /tmp/dic.expanded