1 # -*- coding: utf-8 -*-
3 # This Makefile creates German hyphenation patterns in subdirectories
4 # $(TRAD) and $(REFO) for traditional and new orthography, respectively.
5 # Hyphenation patterns for traditional Swiss German are generated in
8 # The input data is in $(SRCDIR); the possible targets are `pattern-trad',
9 # `pattern-refo', and `pattern-swiss'. If no target (or target `all') is
10 # given, all patterns for all three targets are built.
12 # SRCDIR (and the other variables) can be easily modified as parameters
13 # while calling `make', e.g.
15 # make pattern-trad SRCDIR=~/git/wortliste
17 # If you add one of the (phony) targets `major', `fugen', or `suffix',
18 # patterns that only use major hyphenation points (`Haupttrennstellen')
19 # are created. Example:
21 # make major pattern-refo
23 # The used directories names are the same as above but with `-major' (etc.)
24 # appended to the names.
26 # To control the used weights in the major hyphenation patterns, add
27 # variable `W=N', where `N' gives the quality: value 1 specifies the best
28 # hyphenation points only, value 2 both the best and second-best points,
29 # etc. The default is value 0, using all major hyphenation points.
33 # Dieses Makefile erzeugt deutsche Trennmuster in den
34 # Unterverzeichnissen $(TRAD) und $(REFO) für die traditionelle
35 # bzw. reformierte Rechtschreibung. Trennmuster für tradionelles
36 # deutschschweizerisches Deutsch werden Verzeichnis $(SWISS) erzeugt.
38 # Die Eingabedaten werden im Verzeichnis $(SRCDIR) erwartet; die möglichen
39 # Make-Ziele sind `pattern-trad', `pattern-refo' und `pattern-swiss'. Wenn
40 # kein Ziel angegeben ist (oder man das Ziel `all' verwendet), werden alle
41 # drei Trennmuster erzeugt.
43 # SRCDIR (und die anderen Variablen) kann man leicht beim Aufruf von
44 # `make' als Parameter modifizieren, z.B.
46 # make pattern-trad SRCDIR=~/git/wortliste
48 # Wird eines der zusätzlichen (künstlichen) Ziele `major', `fugen' oder
49 # `suffix' angegeben, werden Haupttrennstellmuster erzeugt.
53 # make major pattern-refo
55 # Die verwendeten Verzeichnisnamen sind die gleichen wie oben, allerdings
56 # mit einem angehängten `-major', `-fugen' bzw. `-suffix'.
58 # Diese Spezialmuster spiegeln die Auszeichnung in der Liste direkt wider.
59 # Sie haben nicht das Ziel, "gute" Trennungen in Texten zu erzeugen, sondern
60 # sind zum Testen der Konsistenz der Auszeichnung sowie zum "kategorisierten"
61 # Markieren der Trennstellen neuer Wörter gedacht.
63 # Bei `major' kann die Menge der verwendeten Haupttrennstellen mittels der
64 # Variable `W=N' (Wichtungs-Schwellwert)
65 # kontrolliert werden, wo `N' die Qualität angibt: Wert 1 selektiert
66 # nur die besten Haupttrennstellen, Wert 2 die besten und zweitbesten
67 # Haupttrennstellen usw. Der Standardwert für `W' ist 0; er gibt an, dass
68 # alle Haupttrennstellen verwendet werden sollen.
70 # Die Ziele `de-Latf' und `de-x-long-s' erzeugen (experimentelle) Wortlisten
71 # und (Quasi-) Trennmuster für die Wandlung von Wörtern in traditioneller oder
72 # reformierter Standardorthographie in der Variante mit Unterscheidung von
73 # langem und runden S (Binnen-S vs. Schluß-S) wie sie im Satz mit gebrochenen
74 # Schriften benötigt wird. Ein Beispiel für die Anwendung dieser Muster ist
75 # das Skript `skripte/python/patuse/long_s_conversion.py`
80 DATADIR
= $(SRCDIR
)/daten
81 SCRIPTDIR
= $(SRCDIR
)/skripte
82 LANGSDIR
= $(SCRIPTDIR
)/python
/lang_s
85 ifneq ($(findstring major
,$(MAKECMDGOALS
)),)
87 # A single `-' gets removed; all other combinations of `-', `<', `>',
88 # and `=' are converted to a hyphen.
89 SEDMAJOR
= $(SED
) -e
'/[=<>-]/!n' \
92 -e
's/[=<>][=<>]*/-/g' \
96 ifeq ($(words $(MAKECMDGOALS
)),1)
99 # This is to suppress the `nothing to be done' warning.
103 else ifneq ($(findstring fugen
,$(MAKECMDGOALS
)),)
105 # All combinations of `-', `<', `>', `<=', `=>' get removed,
106 # runs of `=' are converted to a hyphen.
107 SEDMAJOR
= $(SED
) -e
'/[=<>-]/!n' \
111 -e
's/[<>][<>]*//g' \
115 ifeq ($(words $(MAKECMDGOALS
)),1)
118 # This is to suppress the `nothing to be done' warning.
122 else ifneq ($(findstring suffix,$(MAKECMDGOALS
)),)
124 # All combinations of `-', `<', `=' get removed,
125 # runs of `>' are converted to a hyphen.
126 SEDMAJOR
= $(SED
) -e
'/[=<>-]/!n' \
128 -e
's/[<=][<=]*//g' \
132 ifeq ($(words $(MAKECMDGOALS
)),1)
135 # This is to suppress the `nothing to be done' warning.
145 TRAD
= dehypht-x
$(MAJOR
)
146 REFO
= dehyphn-x
$(MAJOR
)
147 SWISS
= dehyphts-x
$(MAJOR
)
151 LC_ENVVARS
= LC_COLLATE
=de_DE.UTF-8 \
157 DATE
= $(shell date
'+%Y-%m-%d')
166 SORT
= $(LC_ENVVARS
) sort -d \
167 |
$(LC_ENVVARS
) uniq
-i
169 LEFTHYPHENMIN
= $(strip $(shell $(SED
) 's/^\(..\).*/\1/;q' < $(DATADIR
)/german.tr
))
170 RIGHTHYPHENMIN
= $(strip $(shell $(SED
) 's/^..\(..\).*/\1/;q' < $(DATADIR
)/german.tr
))
172 GIT_VERSION
:= `$(CHDIR) $(SRCDIR); \
173 $(GIT) log --format=%H -1 HEAD --`
174 TRADFILES
= $(TRAD
)/$(TRAD
)-$(DATE
).pat
$(TRAD
)/$(TRAD
)-$(DATE
).
tex
175 REFOFILES
= $(REFO
)/$(REFO
)-$(DATE
).pat
$(REFO
)/$(REFO
)-$(DATE
).
tex
176 SWISSFILES
= $(SWISS
)/$(SWISS
)-$(DATE
).pat
$(SWISS
)/$(SWISS
)-$(DATE
).
tex
179 override SRCDIR
:= $(shell cd
$(SRCDIR
); $(PWD
))
182 all: pattern-trad pattern-refo pattern-swiss
184 .PHONY
: pattern-trad pattern-refo pattern-swiss major fugen
suffix
185 pattern-trad
: $(TRADFILES
)
186 pattern-refo
: $(REFOFILES
)
187 pattern-swiss
: $(SWISSFILES
)
189 # intermediate targets
193 .PHONY
: words-trad words-refo
194 words-trad
: $(TRAD
)/words.hyphenated.trad
195 words-refo
: $(REFO
)/words.hyphenated.refo
198 .PHONY
: pre-trad pre-refo pre-swiss
206 $(TRADFILES
) $(TRAD
)/words.hyphenated.trad
: pre-trad
207 $(REFOFILES
) $(REFO
)/words.hyphenated.refo
: pre-refo
208 $(SWISSFILES
) $(SWISS
)/words.hyphenated.swiss
: pre-swiss
211 # GNU make supports creation of multiple targets by a single
212 # invocation of a recipe only for pattern rules, thus we have
213 # to use a `sentinel file' (using `echo' for the time stamp).
216 $(TRAD
)/pattern
.8 $(TRAD
)/pattern.rules
: $(TRAD
)/make-full-pattern-trad
218 $(TRAD
)/make-full-pattern-trad
: $(TRAD
)/words.hyphenated.trad
220 $(SH
) $(SCRIPTDIR
)/make-full-pattern.sh
$(<F
) $(DATADIR
)/german.tr
223 $(TRAD
)/$(TRAD
)-$(DATE
).pat
: $(TRAD
)/pattern
.8 $(TRAD
)/pattern.rules
224 $(CAT
) $(DATADIR
)/$(TRAD
).1 \
225 |
$(SED
) -e
"s/@DATE@/$(DATE)/" \
226 -e
"s/@GIT_VERSION@/$(GIT_VERSION)/" \
227 -e
"s/@LEFTHYPHENMIN@/$(LEFTHYPHENMIN)/" \
228 -e
"s/@RIGHTHYPHENMIN@/$(RIGHTHYPHENMIN)/" > $@
; \
229 $(CAT
) $(TRAD
)/pattern.rules
>> $@
; \
230 $(CAT
) $(DATADIR
)/$(TRAD
).2 >> $@
; \
231 $(CAT
) $(TRAD
)/pattern
.8 >> $@
; \
232 $(CAT
) $(DATADIR
)/$(TRAD
).3 >> $@
235 $(REFO
)/pattern
.8 $(REFO
)/pattern.rules
: $(REFO
)/make-full-pattern-refo
237 $(REFO
)/make-full-pattern-refo
: $(REFO
)/words.hyphenated.refo
239 $(SH
) $(SCRIPTDIR
)/make-full-pattern.sh
$(<F
) $(DATADIR
)/german.tr
242 $(REFO
)/$(REFO
)-$(DATE
).pat
: $(REFO
)/pattern
.8 $(REFO
)/pattern.rules
243 $(CAT
) $(DATADIR
)/$(REFO
).1 \
244 |
$(SED
) -e
"s/@DATE@/$(DATE)/" \
245 -e
"s/@GIT_VERSION@/$(GIT_VERSION)/" \
246 -e
"s/@LEFTHYPHENMIN@/$(LEFTHYPHENMIN)/" \
247 -e
"s/@RIGHTHYPHENMIN@/$(RIGHTHYPHENMIN)/" > $@
; \
248 $(CAT
) $(REFO
)/pattern.rules
>> $@
; \
249 $(CAT
) $(DATADIR
)/$(REFO
).2 >> $@
; \
250 $(CAT
) $(REFO
)/pattern
.8 >> $@
; \
251 $(CAT
) $(DATADIR
)/$(REFO
).3 >> $@
254 $(SWISS
)/pattern
.8 $(SWISS
)/pattern.rules
: $(SWISS
)/make-full-pattern-swiss
256 $(SWISS
)/make-full-pattern-swiss
: $(SWISS
)/words.hyphenated.swiss
258 $(SH
) $(SCRIPTDIR
)/make-full-pattern.sh
$(<F
) $(DATADIR
)/german.tr
261 $(SWISS
)/$(SWISS
)-$(DATE
).pat
: $(SWISS
)/pattern
.8 $(SWISS
)/pattern.rules
262 $(CAT
) $(DATADIR
)/$(SWISS
).1 \
263 |
$(SED
) -e
"s/@DATE@/$(DATE)/" \
264 -e
"s/@GIT_VERSION@/$(GIT_VERSION)/" \
265 -e
"s/@LEFTHYPHENMIN@/$(LEFTHYPHENMIN)/" \
266 -e
"s/@RIGHTHYPHENMIN@/$(RIGHTHYPHENMIN)/" > $@
; \
267 $(CAT
) $(SWISS
)/pattern.rules
>> $@
; \
268 $(CAT
) $(DATADIR
)/$(SWISS
).2 >> $@
; \
269 $(CAT
) $(SWISS
)/pattern
.8 >> $@
; \
270 $(CAT
) $(DATADIR
)/$(SWISS
).3 >> $@
273 $(TRAD
)/words.hyphenated.trad
: $(SRCDIR
)/$(WORDLIST
)
275 |
$(PERL
) $(SCRIPTDIR
)/extract-tex.pl
-t
-1 $(PERLMAJOR
) \
279 $(REFO
)/words.hyphenated.refo
: $(SRCDIR
)/$(WORDLIST
)
281 |
$(PERL
) $(SCRIPTDIR
)/extract-tex.pl
-1 $(PERLMAJOR
) \
285 $(SWISS
)/words.hyphenated.swiss
: $(SRCDIR
)/$(WORDLIST
)
287 |
$(PERL
) $(SCRIPTDIR
)/extract-tex.pl
-s
-1 $(PERLMAJOR
) \
292 $(TRAD
)/$(TRAD
)-$(DATE
).
tex: $(DATADIR
)/$(TRAD
).
tex.in
294 |
$(SED
) -e
"s/@DATE@/$(DATE)/" > $@
296 $(REFO
)/$(REFO
)-$(DATE
).
tex: $(DATADIR
)/$(REFO
).
tex.in
298 |
$(SED
) -e
"s/@DATE@/$(DATE)/" > $@
300 $(SWISS
)/$(SWISS
)-$(DATE
).
tex: $(DATADIR
)/$(SWISS
).
tex.in
302 |
$(SED
) -e
"s/@DATE@/$(DATE)/" > $@
304 # Listen und Patterns de-Latf (deutsch, Latin script, fraktur;
305 # Orthographie für Satz mit gebrochenen Schriften und rundem und langem S)
307 # Wortlisten mit Langem-S: gemischt, de-1901 (alt), de-1996 (reform)
309 $(LATF
)/words-de-Latf.txt
: wortliste
$(LANGSDIR
)/s2long-s.py
311 $(PYTHON
) $(LANGSDIR
)/s2long-s.py
--drop-homonyms \
312 -l
"de-1901,de-1996" -i wortliste
-o
$(LATF
)/words-de-Latf.txt
314 $(LATF
)/words-de-1901-Latf.txt
: wortliste
$(LANGSDIR
)/s2long-s.py
316 $(PYTHON
) $(LANGSDIR
)/s2long-s.py
--drop-homonyms
-l de-1901 \
317 -i wortliste
-o
$(LATF
)/words-de-1901-Latf.txt
319 $(LATF
)/words-de-1996-Latf.txt
: wortliste
$(LANGSDIR
)/s2long-s.py
320 $(PYTHON
) $(LANGSDIR
)/s2long-s.py
--drop-homonyms
-l de-1996 \
321 -i wortliste
-o
$(LATF
)/words-de-1996-Latf.txt
323 # de-Latf...: Variante mit Quasi-Trennstellen nach rund-s (aus-sagen)
325 .PHONY
: de-Latf de-1901-Latf de-1996-Latf
327 de-Latf
: $(LATF
)/de-Latf.pat
329 $(LATF
)/words-de-Latf.hyphenated
: $(LATF
)/words-de-Latf.txt
330 $(PYTHON
) $(LANGSDIR
)/de_Latf_quasihyph.py
< $< > $@
332 $(LATF
)/de-Latf.pat
: $(LATF
)/words-de-Latf.hyphenated
334 $(SH
) $(SCRIPTDIR
)/make-full-pattern.sh
$(<F
) $(DATADIR
)/de-Latf.tr
335 $(CAT
) $(LATF
)/pattern
.8 >> $@
;
337 de-1901-Latf
: $(LATF
)/de-1901-Latf.pat
339 $(LATF
)/words-de-1901-Latf.hyphenated
: $(LATF
)/words-de-1901-Latf.txt
340 $(PYTHON
) $(LANGSDIR
)/de_Latf_quasihyph.py
< $< > $@
342 $(LATF
)/de-1901-Latf.pat
: $(LATF
)/words-de-1901-Latf.hyphenated
344 $(SH
) $(SCRIPTDIR
)/make-full-pattern.sh
$(<F
) $(DATADIR
)/de-Latf.tr
345 $(CAT
) $(LATF
)/pattern
.8 >> $@
;
347 de-1996-Latf
: $(LATF
)/de-1996-Latf.pat
349 $(LATF
)/words-de-1996-Latf.hyphenated
: $(LATF
)/words-de-1996-Latf.txt
350 $(PYTHON
) $(LANGSDIR
)/de_Latf_quasihyph.py
< $< > $@
352 $(LATF
)/de-1996-Latf.pat
: $(LATF
)/words-de-1996-Latf.hyphenated
354 $(SH
) $(SCRIPTDIR
)/make-full-pattern.sh
$(<F
) $(DATADIR
)/de-Latf.tr
355 $(CAT
) $(LATF
)/pattern
.8 >> $@
;
357 # long-s: Variante mit Quasi-Trennstellen nach Lang-S (auss-agen)
359 .PHONY
: de-x-long-s de-1901-x-long-s de-1996-x-long-s
361 de-x-long-s
: $(LONG_S
)/de-x-long-s.pat
363 $(LONG_S
)/words-de-x-long-s.hyphenated
: $(LATF
)/words-de-Latf.txt
365 $(PYTHON
) $(LANGSDIR
)/long_s_quasihyph.py
< $< > $@
367 $(LONG_S
)/de-x-long-s.pat
: $(LONG_S
)/words-de-x-long-s.hyphenated
368 $(CHDIR
) $(LONG_S
); \
369 $(SH
) $(SCRIPTDIR
)/make-full-pattern.sh
$(<F
) $(DATADIR
)/de-Latf.tr
370 $(CAT
) $(LONG_S
)/pattern
.8 >> $@
;
372 de-1901-x-long-s
: $(LONG_S
)/de-1901-x-long-s.pat
374 $(LONG_S
)/words-de-1901-x-long-s.hyphenated
: $(LATF
)/words-de-1901-Latf.txt
376 $(PYTHON
) $(LANGSDIR
)/long_s_quasihyph.py
< $< > $@
378 $(LONG_S
)/de-1901-x-long-s.pat
: $(LONG_S
)/words-de-1901-x-long-s.hyphenated
379 $(CHDIR
) $(LONG_S
); \
380 $(SH
) $(SCRIPTDIR
)/make-full-pattern.sh
$(<F
) $(DATADIR
)/de-Latf.tr
381 $(CAT
) $(LONG_S
)/pattern
.8 >> $@
;
383 de-1996-x-long-s
: $(LONG_S
)/de-1996-x-long-s.pat
385 $(LONG_S
)/words-de-1996-x-long-s.hyphenated
: $(LATF
)/words-de-1996-Latf.txt
387 $(PYTHON
) $(LANGSDIR
)/long_s_quasihyph.py
< $< > $@
389 $(LONG_S
)/de-1996-x-long-s.pat
: $(LONG_S
)/words-de-1996-x-long-s.hyphenated
390 $(CHDIR
) $(LONG_S
); \
391 $(SH
) $(SCRIPTDIR
)/make-full-pattern.sh
$(<F
) $(DATADIR
)/de-Latf.tr
392 $(CAT
) $(LONG_S
)/pattern
.8 >> $@
;