1 # -*- coding: utf-8 -*-
3 # (See below for a German introduction.)
5 # This Makefile creates German hyphenation patterns in subdirectories
6 # `$(TRAD)` and `$(REFO)` for traditional and new orthography, respectively.
7 # Hyphenation patterns for traditional Swiss German are generated in
8 # directory `$(SWISS)`.
10 # The input data is expected to be in `$(SRCDIR)`, which by default is set to
11 # the directory containing the Makefile. Output goes to directory
12 # `$(OUTDIR)`, which by default is set to './muster'.
14 # The possible targets are `pattern-trad`, `pattern-refo`, and
15 # `pattern-swiss`. If no target (or target `all`) is given, all patterns for
16 # all three targets are built.
21 # mkdir build-patterns
23 # make --makefile=~/git/wortliste/Makefile OUTDIR=. pattern-trad
26 # If you add one of the (phony) targets `major`, `fugen`, or `suffix`,
27 # patterns that only use major hyphenation points ('Haupttrennstellen') are
31 # make major pattern-refo
34 # The output directories have `-major` (etc.) appended to their names. Note
35 # that these special patterns reflect the markup in the `wortliste` file;
36 # they are not intended to produce patterns for 'good' hyphenation in text
37 # but rather to test the consistency of the markup, and to assist in adding
40 # To control the used weights in the major hyphenation patterns, add variable
41 # `W=N`, where `N` gives the quality: value 1 specifies the best hyphenation
42 # points only, value 2 both the best and second-best points, etc. The
43 # default is value 0, using all major hyphenation points.
45 # The targets `de-Latf`, `de-Latf-1901`, and `de-Latf-1996` create
46 # (experimental) patterns and word lists for converting words in traditional
47 # and new orthography that make a distinction between long and round S as
48 # needed for typesetting with Fraktur fonts. An example for applying these
49 # patterns is the script `skripte/python/patuse/long_s_conversion.py`.
51 # The targets `de_ligaturaufbruch`, `de-1901_ligaturaufbruch`, and
52 # `de-1996_ligaturaufbruch` create (experimental) patterns and word lists for
53 # 'breaking up' ligatures: The quasi-hyphenation indicate positions where
54 # ligatures like 'fl' must not occur.
57 # Dieses Makefile erzeugt deutsche Trennmuster in den Unterverzeichnissen
58 # `$(TRAD)` und `$(REFO)` für die traditionelle bzw. reformierte
59 # Rechtschreibung. Trennmuster für tradionelles deutschschweizerisches
60 # Deutsch werden Verzeichnis `$(SWISS)` erzeugt.
62 # Die Eingabedaten werden im Verzeichnis `$(SRCDIR)` erwartet, welches
63 # standardmäßig identisch zum dem Verzeichnis ist, welches die
64 # `Makefile`-Datei enthält. Die Ausgabe wird in Verzeichnis `$(OUTDIR)`
65 # erzeugt, welches standardmäßig den Namen './muster' hat.
67 # Die möglichen Make-Ziele sind `pattern-trad`, `pattern-refo` und
68 # `pattern-swiss`. Wenn kein Ziel angegeben ist (oder man das Ziel `all`
69 # verwendet), werden alle drei Trennmuster erzeugt.
74 # mkdir build-patterns
76 # make --makefile=~/git/wortliste/Makefile OUTDIR=. pattern-trad
79 # Wird eines der zusätzlichen (künstlichen) Ziele `major`, `fugen` oder
80 # `suffix` angegeben, werden Haupttrennstellmuster erzeugt.
85 # make major pattern-refo
88 # Die verwendeten Verzeichnisnamen sind die gleichen wie oben, allerdings mit
89 # einem angehängten `-major`, `-fugen` bzw. `-suffix`.
91 # Diese Spezialmuster spiegeln die Auszeichnung in der Liste direkt wider.
92 # Sie haben nicht das Ziel, "gute" Trennungen in Texten zu erzeugen, sondern
93 # sind zum Testen der Konsistenz der Auszeichnung sowie zum "kategorisierten"
94 # Markieren der Trennstellen neuer Wörter gedacht.
96 # Bei `major` kann die Menge der verwendeten Haupttrennstellen mittels der
97 # Variable `W=N' (Wichtungs-Schwellwert) kontrolliert werden, wo `N` die
98 # Qualität angibt: Wert 1 selektiert nur die besten Haupttrennstellen, Wert 2
99 # die besten und zweitbesten Haupttrennstellen usw. Der Standardwert für `W`
100 # ist 0; er gibt an, dass alle Haupttrennstellen verwendet werden sollen.
102 # Die Ziele `de-Latf`, `de-Latf-1901` und `de-Latf-1996` erzeugen
103 # (experimentelle) Wortlisten und (Quasi-) Trennmuster für die Wandlung von
104 # Wörtern in traditioneller oder reformierter Standardorthographie in der
105 # Variante mit Unterscheidung von langem und runden S (Binnen-S vs.
106 # Schluß-S), wie sie im Satz mit gebrochenen Schriften benötigt wird. Ein
107 # Beispiel für die Anwendung dieser Muster ist das Skript
108 # `skripte/python/patuse/long_s_conversion.py`.
110 # Die Ziele `de_ligaturaufbruch`, `de-1901_ligaturaufbruch`, and
111 # `de-1996_ligaturaufbruch` erzeugen (experimentelle) Wortlisten und
112 # Trennmuster, um Ligaturen 'aufzubrechen': Die Quasi-Trennstellen zeigen an,
113 # wo Ligaturen wie 'fl' nicht auftreten dürfen.
122 SRCDIR
= $(dir $(realpath
$(lastword
$(MAKEFILE_LIST
))))
123 DATADIR
= $(SRCDIR
)/daten
124 SCRIPTDIR
= $(SRCDIR
)/skripte
125 LANGSDIR
= $(SCRIPTDIR
)/spezialmuster
/lang_s
126 WORDLIST
= $(SRCDIR
)/wortliste
130 .PHONY
: major fugen
suffix
132 ifneq ($(findstring major
,$(MAKECMDGOALS
)),)
134 # A single `-` gets removed; all other combinations of `-`, `<`, `>`, and
135 # `=` are converted to a hyphen.
136 SED_PATTYPE
= $(SED
) -e
'/[=<>-]/!n' \
139 -e
's/[=<>][=<>]*/-/g'
140 PERL_PATTYPE
= -g
$(W
)
142 ifeq ($(words $(MAKECMDGOALS
)),1)
145 # This is to suppress the 'nothing to be done' warning.
149 else ifneq ($(findstring fugen
,$(MAKECMDGOALS
)),)
151 # All combinations of `-`, `<`, `>`, `<=`, `=>` get removed, runs of `=`
152 # are converted to a hyphen.
153 SED_PATTYPE
= $(SED
) -e
'/[=<>-]/!n' \
157 -e
's/[<>][<>]*//g' \
159 PERL_PATTYPE
= -g
$(W
)
161 ifeq ($(words $(MAKECMDGOALS
)),1)
164 # This is to suppress the 'nothing to be done' warning.
168 else ifneq ($(findstring suffix,$(MAKECMDGOALS
)),)
170 # All combinations of `-`, `<`, `=` get removed, runs of `>` are converted
172 SED_PATTYPE
= $(SED
) -e
'/[=<>-]/!n' \
174 -e
's/[<=][<=]*//g' \
176 PERL_PATTYPE
= -g
$(W
)
178 ifeq ($(words $(MAKECMDGOALS
)),1)
181 # This is to suppress the 'nothing to be done' warning.
191 TRAD
= dehypht-x
$(PATTYPE
)
192 REFO
= dehyphn-x
$(PATTYPE
)
193 SWISS
= dehyphts-x
$(PATTYPE
)
195 LIGA
= $(OUTDIR
)/ligaturaufbruch
197 LC_ENVVARS
= LC_COLLATE
=de_DE.UTF-8 \
203 DATE
= $(shell date
'+%Y-%m-%d')
210 SPRACHAUSZUG
= $(PYTHON
) $(SCRIPTDIR
)/wortliste
/sprachauszug.py
213 SORT
= $(LC_ENVVARS
) sort -d \
214 |
$(LC_ENVVARS
) uniq
-i
216 LEFTHYPHENMIN
= $(strip $(shell $(SED
) 's/^\(..\).*/\1/;q' < $(DATADIR
)/german.tr
))
217 RIGHTHYPHENMIN
= $(strip $(shell $(SED
) 's/^..\(..\).*/\1/;q' < $(DATADIR
)/german.tr
))
219 GIT_VERSION
:= `$(CHDIR) $(SRCDIR) \
220 && $(GIT) log --format=%H -1 HEAD --`
222 TRADDIR
= $(OUTDIR
)/$(TRAD
)
223 REFODIR
= $(OUTDIR
)/$(REFO
)
224 SWISSDIR
= $(OUTDIR
)/$(SWISS
)
225 TRADFILES
= $(TRADDIR
)/$(TRAD
)-$(DATE
).pat
$(TRADDIR
)/$(TRAD
)-$(DATE
).
tex
226 REFOFILES
= $(REFODIR
)/$(REFO
)-$(DATE
).pat
$(REFODIR
)/$(REFO
)-$(DATE
).
tex
227 SWISSFILES
= $(SWISSDIR
)/$(SWISS
)-$(DATE
).pat
$(SWISSDIR
)/$(SWISS
)-$(DATE
).
tex
229 # This macro defines a backslash followed by a newline. We use it to
230 # beautify canned recipes, avoiding overlong lines in the make output.
236 # A comma cannot be part of an argument of `call`; we have to use a variable
241 override SRCDIR
:= $(shell cd
$(SRCDIR
) && $(PWD
))
248 all: pattern-trad pattern-refo pattern-swiss
250 .PHONY
: pattern-trad pattern-refo pattern-swiss
251 pattern-trad
: $(TRADFILES
)
252 pattern-refo
: $(REFOFILES
)
253 pattern-swiss
: $(SWISSFILES
)
258 .PHONY
: words-trad words-refo words-swiss
259 words-trad
: $(TRADDIR
)/words.hyphenated.trad
260 words-refo
: $(REFODIR
)/words.hyphenated.refo
261 words-swiss
: $(REFODIR
)/words.hyphenated.swiss
264 .PHONY
: pre-trad pre-refo pre-swiss
272 pattern-trad words-trad
: | pre-trad
273 pattern-refo words-refo
: | pre-refo
274 pattern-swiss words-swiss
: | pre-swiss
276 # GNU make supports creation of multiple targets by a single invocation of a
277 # recipe only for pattern rules, thus we have to use a 'sentinel file' (using
278 # 'echo' for the time stamp).
280 $(TRADDIR
)/pattern
.8 $(TRADDIR
)/pattern.rules
: $(TRADDIR
)/make-full-pattern-trad
281 $(REFODIR
)/pattern
.8 $(REFODIR
)/pattern.rules
: $(REFODIR
)/make-full-pattern-refo
282 $(SWISSDIR
)/pattern
.8 $(SWISSDIR
)/pattern.rules
: $(SWISSDIR
)/make-full-pattern-swiss
285 # $(1): pattern directory
286 define make-full-pattern
287 $(CHDIR
) $(1) $(bsnl
)\
288 && $(SH
) $(SCRIPTDIR
)/trennmuster
/make-full-pattern.sh
$(bsnl
)\
289 $(<F
) $(DATADIR
)/german.tr
293 # Both `make-full-pattern.sh` and `german.tr` control hyphenation parameters;
294 # it is thus a good idea to make them prerequisites.
295 $(TRADDIR
)/make-full-pattern-trad \
296 $(REFODIR
)/make-full-pattern-refo \
297 $(SWISSDIR
)/make-full-pattern-swiss
: \
298 $(SCRIPTDIR
)/trennmuster
/make-full-pattern.sh \
301 $(TRADDIR
)/make-full-pattern-trad
: $(TRADDIR
)/words.hyphenated.trad
302 $(call make-full-pattern
,$(TRADDIR
))
303 $(REFODIR
)/make-full-pattern-refo
: $(REFODIR
)/words.hyphenated.refo
304 $(call make-full-pattern
,$(REFODIR
))
305 $(SWISSDIR
)/make-full-pattern-swiss
: $(SWISSDIR
)/words.hyphenated.swiss
306 $(call make-full-pattern
,$(SWISSDIR
))
308 # $(1): pattern file name
309 # $(2): pattern directory
311 $(CAT
) $(DATADIR
)/$(1).1 $(bsnl
)\
312 |
$(SED
) -e
"s/@DATE@/$(DATE)/" $(bsnl
)\
313 -e
"s/@GIT_VERSION@/$(GIT_VERSION)/" $(bsnl
)\
314 -e
"s/@LEFTHYPHENMIN@/$(LEFTHYPHENMIN)/" $(bsnl
)\
315 -e
"s/@RIGHTHYPHENMIN@/$(RIGHTHYPHENMIN)/" > $@
$(bsnl
)\
316 && $(CAT
) $(2)/pattern.rules
>> $@
$(bsnl
)\
317 && $(CAT
) $(DATADIR
)/$(TRAD
).2 >> $@
$(bsnl
)\
318 && $(CAT
) $(2)/pattern
.8 >> $@
$(bsnl
)\
319 && $(CAT
) $(DATADIR
)/$(TRAD
).3 >> $@
322 $(TRADDIR
)/$(TRAD
)-$(DATE
).pat
: $(TRADDIR
)/pattern
.8 $(TRADDIR
)/pattern.rules
323 $(call make-pat-file
,$(TRAD
),$(TRADDIR
))
324 $(REFODIR
)/$(REFO
)-$(DATE
).pat
: $(REFODIR
)/pattern
.8 $(REFODIR
)/pattern.rules
325 $(call make-pat-file
,$(REFO
),$(REFODIR
))
326 $(SWISSDIR
)/$(SWISS
)-$(DATE
).pat
: $(SWISSDIR
)/pattern
.8 $(SWISSDIR
)/pattern.rules
327 $(call make-pat-file
,$(SWISS
),$(SWISSDIR
))
329 # $(1): arguments for `extract-tex.pl`
332 |
$(PERL
) $(SCRIPTDIR
)/wortliste
/extract-tex.pl
$(1) $(PERL_PATTYPE
) $(bsnl
)\
333 |
$(SED_PATTYPE
) $(bsnl
)\
337 $(TRADDIR
)/words.hyphenated.trad
: $(WORDLIST
)
338 $(call extract-tex
,-t
-1 -U
)
339 $(REFODIR
)/words.hyphenated.refo
: $(WORDLIST
)
340 $(call extract-tex
,-1 -U
)
341 $(SWISSDIR
)/words.hyphenated.swiss
: $(WORDLIST
)
342 $(call extract-tex
,-s
-1 -U
)
345 $(SED
) -e
"s/@DATE@/$(DATE)/" $(bsnl
)\
349 $(TRADDIR
)/$(TRAD
)-$(DATE
).
tex: $(DATADIR
)/$(TRAD
).
tex.in
351 $(REFODIR
)/$(REFO
)-$(DATE
).
tex: $(DATADIR
)/$(REFO
).
tex.in
353 $(SWISSDIR
)/$(SWISS
)-$(DATE
).
tex: $(DATADIR
)/$(SWISS
).
tex.in
358 # patterns for handling Fraktur
361 # Word lists and patterns 'de-Latf' (i.e., German, Latin script, Fraktur;
362 # orthography for typesetting with Blackletter using round and long 's'):
363 # 'de-1901' (old orthography), 'de-1996' (reformed orthography), and mixed.
364 # The final patterns contain quasi-hyphenation after round 's' ('aus-sagen'
367 .PHONY
: de-Latf de-Latf-1901 de-Latf-1996
368 de-Latf
: $(LATF
)/de-Latf.pat
369 de-Latf-1901
: $(LATF
)/de-Latf-1901.pat
370 de-Latf-1996
: $(LATF
)/de-Latf-1996.pat
372 # $(1): arguments for `-l` parameter of `s2long-s.py`
375 $(PYTHON
) $(LANGSDIR
)/s2long-s.py
--drop-homonyms
-l
$(1) $(bsnl
)\
379 $(LATF
)/words-de-Latf.txt
: $(WORDLIST
) $(LANGSDIR
)/s2long-s.py
380 $(call s2long-s
,"de-1901$(,)de-1996")
381 $(LATF
)/words-de-Latf-1901.txt
: $(WORDLIST
) $(LANGSDIR
)/s2long-s.py
382 $(call s2long-s
,"de-1901")
383 $(LATF
)/words-de-Latf-1996.txt
: $(WORDLIST
) $(LANGSDIR
)/s2long-s.py
384 $(call s2long-s
,"de-1996")
387 $(PYTHON
) $(LANGSDIR
)/de_Latf_quasihyph.py
$(bsnl
)\
391 $(LATF
)/words-de-Latf.hyphenated
: $(LATF
)/words-de-Latf.txt
393 $(LATF
)/words-de-Latf-1901.hyphenated
: $(LATF
)/words-de-Latf-1901.txt
395 $(LATF
)/words-de-Latf-1996.hyphenated
: $(LATF
)/words-de-Latf-1996.txt
398 define make-full-latf-pattern
399 $(CHDIR
) $(LATF
) $(bsnl
)\
400 && $(SH
) $(SCRIPTDIR
)/trennmuster
/make-full-pattern.sh
$(bsnl
)\
401 $(<F
) $(DATADIR
)/de-Latf.tr
402 $(CAT
) $(LATF
)/pattern
.8 >> $@
405 $(LATF
)/de-Latf.pat
: $(LATF
)/words-de-Latf.hyphenated
406 $(call make-full-latf-pattern
)
407 $(LATF
)/de-Latf-1901.pat
: $(LATF
)/words-de-Latf-1901.hyphenated
408 $(call make-full-latf-pattern
)
409 $(LATF
)/de-Latf-1996.pat
: $(LATF
)/words-de-Latf-1996.hyphenated
410 $(call make-full-latf-pattern
)
414 # patterns for 'breaking up' typographic ligatures
417 # Word lists and patterns 'de_ligaturaufbruch': 'de-1901' (old orthography),
418 # 'de-1996' (reformed orthgraphy), and mixed. The final patterns contain
419 # quasi-hyphenation to indicate positions where ligatures like 'fl' must not
420 # occur ('Dorfladen' => 'Dorf-laden').
422 .PHONY
: de_ligaturaufbruch de-1901_ligaturaufbruch de-1996_ligaturaufbruch
423 de_ligaturaufbruch
: $(LIGA
)/de_ligaturaufbruch-
$(DATE
).pat
424 de-1901_ligaturaufbruch
: $(LIGA
)/de-1901_ligaturaufbruch-
$(DATE
).pat
425 de-1996_ligaturaufbruch
: $(LIGA
)/de-1996_ligaturaufbruch-
$(DATE
).pat
427 # $(1): arguments for `-l` parameter of `sprachauszug.py`
428 define ligaturaufbruch-eingabe
430 $(SPRACHAUSZUG
) -l
$(1) -s
"morphemgrenzen,einfach" $(bsnl
)\
434 $(LIGA
)/de_ligaturaufbruch.eingabe
: $(WORDLIST
)
435 $(call ligaturaufbruch-eingabe
,"de-1901$(,)de-CH-1901$(,)de-1996$(,)de-CH-1996")
436 $(LIGA
)/de-1901_ligaturaufbruch.eingabe
: $(WORDLIST
)
437 $(call ligaturaufbruch-eingabe
,"de-1901$(,)de-CH-1901")
438 $(LIGA
)/de-1996_ligaturaufbruch.eingabe
: $(WORDLIST
)
439 $(call ligaturaufbruch-eingabe
,"de-1996$(,)de-CH-1996")
441 define make-full-liga-pattern
442 $(CHDIR
) $(LIGA
) $(bsnl
)\
443 && $(SH
) $(SCRIPTDIR
)/trennmuster
/make-full-pattern.sh
$(bsnl
)\
444 $(<F
) $(DATADIR
)/de-Latf.tr
445 $(CAT
) $(LIGA
)/pattern
.8 >> $@
448 $(LIGA
)/de_ligaturaufbruch-
$(DATE
).pat
: $(LIGA
)/de_ligaturaufbruch.eingabe
449 $(call make-full-liga-pattern
)
450 $(LIGA
)/de-1901_ligaturaufbruch-
$(DATE
).pat
: $(LIGA
)/de-1901_ligaturaufbruch.eingabe
451 $(call make-full-liga-pattern
)
452 $(LIGA
)/de-1996_ligaturaufbruch-
$(DATE
).pat
: $(LIGA
)/de-1996_ligaturaufbruch.eingabe
453 $(call make-full-liga-pattern
)
461 # Exzerpte mit `sprachauszug.py`
463 exzerpte
/de-1996_morphemgrenzen
:
465 $(SPRACHAUSZUG
) -l de-1996 \
466 -s
"morphemgrenzen,einfach" < $(WORDLIST
) > $@
467 exzerpte
/de-1901_morphemgrenzen
:
469 $(SPRACHAUSZUG
) -l de-1901 \
470 -s
"morphemgrenzen,einfach" < $(WORDLIST
) > $@
471 exzerpte
/de-1996_hyphenmin3
:
473 $(SPRACHAUSZUG
) -l
"de-1996,de-1996-x-versal" \
474 -s
"standard,morphemisch,hyphenmin3,einfach" < $(WORDLIST
) > $@
475 exzerpte
/de-1996_gesangstext
:
477 $(SPRACHAUSZUG
) -l
"de-1996,de-1996-x-versal" \
478 -s
"syllabisch,gesangstext,einfach" < $(WORDLIST
) > $@