And the tajik side
[apertium.git] / apertium-es-ca / es-ca-unsupervised.make
blob4f37d3a9097de9bed9d85dd17c057f019b325036
1 TAGGER_UNSUPERVISED_ITERATIONS=8
2 BASENAME=apertium-es-ca
3 LANG1=es
4 LANG2=ca
5 TAGGER=$(LANG1)-tagger-data
6 PREFIX=$(LANG1)-$(LANG2)
8 all: $(PREFIX).prob
10 $(PREFIX).prob: $(BASENAME).$(LANG1).tsx $(TAGGER)/$(LANG1).dic $(TAGGER)/$(LANG1).crp
11 apertium-validate-tagger $(BASENAME).$(LANG1).tsx
12 apertium-tagger -t $(TAGGER_UNSUPERVISED_ITERATIONS) \
13 $(TAGGER)/$(LANG1).dic \
14 $(TAGGER)/$(LANG1).crp \
15 $(BASENAME).$(LANG1).tsx \
16 $(PREFIX).prob;
18 $(TAGGER)/$(LANG1).dic: $(BASENAME).$(LANG1).dix $(PREFIX).automorf.bin
19 @echo "Generating $@";
20 @echo "This may take some time. Please, take a cup of coffee and come back later.";
21 apertium-validate-dictionary $(BASENAME).$(LANG1).dix
22 apertium-validate-tagger $(BASENAME).$(LANG1).tsx
23 lt-expand $(BASENAME).$(LANG1).dix | grep -v "__REGEXP__" | grep -v ":<:" |\
24 awk 'BEGIN{FS=":>:|:"}{print $$1 ".";}' | apertium-destxt >$(LANG1).dic.expanded
25 @echo "." >>$(LANG1).dic.expanded
26 @echo "?" >>$(LANG1).dic.expanded
27 @echo ";" >>$(LANG1).dic.expanded
28 @echo ":" >>$(LANG1).dic.expanded
29 @echo "!" >>$(LANG1).dic.expanded
30 @echo "42" >>$(LANG1).dic.expanded
31 @echo "," >>$(LANG1).dic.expanded
32 @echo "(" >>$(LANG1).dic.expanded
33 @echo "\\[" >>$(LANG1).dic.expanded
34 @echo ")" >>$(LANG1).dic.expanded
35 @echo "\\]" >>$(LANG1).dic.expanded
36 @echo "¿" >>$(LANG1).dic.expanded
37 @echo "¡" >>$(LANG1).dic.expanded
38 lt-proc -a $(PREFIX).automorf.bin <$(LANG1).dic.expanded | \
39 apertium-filter-ambiguity $(BASENAME).$(LANG1).tsx > $@
40 rm $(LANG1).dic.expanded;
42 $(TAGGER)/$(LANG1).crp: $(PREFIX).automorf.bin $(TAGGER)/$(LANG1).crp.txt
43 apertium-destxt < $(TAGGER)/$(LANG1).crp.txt | lt-proc $(PREFIX).automorf.bin > $(TAGGER)/$(LANG1).crp
45 clean:
46 rm -f $(PREFIX).prob