1 # -*- coding: utf-8 -*-
3 # (See below for a German introduction.)
5 # This Makefile creates German hyphenation patterns in subdirectories
6 # `$(TRAD)` and `$(REFO)` for traditional and new orthography, respectively.
7 # Hyphenation patterns for traditional Swiss German are generated in
8 # directory `$(SWISS)`.
10 # The input data is expected to be in `$(SRCDIR)`, which by default is set to
11 # the directory containing the Makefile. Output goes to directory
12 # `$(OUTDIR)`, which by default is set to './muster'.
14 # The possible targets are `pattern-trad`, `pattern-refo`, and
15 # `pattern-swiss`. If no target (or target `all`) is given, all patterns for
16 # all three targets are built.
21 # mkdir build-patterns
23 # make --makefile=~/git/wortliste/Makefile OUTDIR=. pattern-trad
26 # If you add one of the (phony) targets `major`, `fugen`, or `suffix`,
27 # patterns that only use major hyphenation points ('Haupttrennstellen') are
28 # created. If you add the (phony) target `gesang`, patterns usable for
29 # German lyrics are created. Example:
32 # make major pattern-refo
35 # The output directories have `-major` (etc.) appended to their names. Note
36 # that the `major`, `fugen`, and `suffix` targets reflect the markup in the
37 # `wortliste` file; they are not intended to produce patterns for 'good'
38 # hyphenation in text but rather to test the consistency of the markup, and
39 # to assist in adding new words.
41 # To control the used weights in the major hyphenation patterns, add variable
42 # `W=N`, where `N` gives the quality: value 1 specifies the best hyphenation
43 # points only, value 2 both the best and second-best points, etc. The
44 # default is value 0, using all major hyphenation points.
46 # The targets `de-Latf`, `de-Latf-1901`, and `de-Latf-1996` create
47 # (experimental) patterns and word lists for converting words in traditional
48 # and new orthography that make a distinction between long and round S as
49 # needed for typesetting with Fraktur fonts. An example for applying these
50 # patterns is the script `skripte/python/patuse/long_s_conversion.py`.
52 # The targets `de_ligaturaufbruch`, `de-1901_ligaturaufbruch`, and
53 # `de-1996_ligaturaufbruch` create (experimental) patterns and word lists for
54 # 'breaking up' ligatures: The quasi-hyphenation indicate positions where
55 # ligatures like 'fl' must not occur.
58 # Dieses Makefile erzeugt deutsche Trennmuster in den Unterverzeichnissen
59 # `$(TRAD)` und `$(REFO)` für die traditionelle bzw. reformierte
60 # Rechtschreibung. Trennmuster für tradionelles deutschschweizerisches
61 # Deutsch werden Verzeichnis `$(SWISS)` erzeugt.
63 # Die Eingabedaten werden im Verzeichnis `$(SRCDIR)` erwartet, welches
64 # standardmäßig identisch zum dem Verzeichnis ist, welches die
65 # `Makefile`-Datei enthält. Die Ausgabe wird in Verzeichnis `$(OUTDIR)`
66 # erzeugt, welches standardmäßig den Namen './muster' hat.
68 # Die möglichen Make-Ziele sind `pattern-trad`, `pattern-refo` und
69 # `pattern-swiss`. Wenn kein Ziel angegeben ist (oder man das Ziel `all`
70 # verwendet), werden alle drei Trennmuster erzeugt.
75 # mkdir build-patterns
77 # make --makefile=~/git/wortliste/Makefile OUTDIR=. pattern-trad
80 # Wird eines der zusätzlichen (künstlichen) Ziele `major`, `fugen` oder
81 # `suffix` angegeben, werden Haupttrennstellmuster erzeugt. Wird das
82 # (künstliche) Ziel `gesang` angegeben, werden Muster mit
83 # Gesangstrennstellen erzeugt.
88 # make major pattern-refo
91 # Die verwendeten Verzeichnisnamen sind die gleichen wie oben, allerdings mit
92 # einem angehängten `-major`, `-fugen`, `-suffix` bzw. `-gesang`.
94 # Die Haupttrennstellmuster spiegeln die Auszeichnung in der Liste direkt
95 # wider. Sie haben nicht das Ziel, "gute" Trennungen in Texten zu erzeugen,
96 # sondern sind zum Testen der Konsistenz der Auszeichnung sowie zum
97 # "kategorisierten" Markieren der Trennstellen neuer Wörter gedacht.
99 # Bei `major` kann die Menge der verwendeten Haupttrennstellen mittels der
100 # Variable `W=N' (Wichtungs-Schwellwert) kontrolliert werden, wo `N` die
101 # Qualität angibt: Wert 1 selektiert nur die besten Haupttrennstellen, Wert 2
102 # die besten und zweitbesten Haupttrennstellen usw. Der Standardwert für `W`
103 # ist 0; er gibt an, dass alle Haupttrennstellen verwendet werden sollen.
105 # Die Ziele `de-Latf`, `de-Latf-1901` und `de-Latf-1996` erzeugen
106 # (experimentelle) Wortlisten und (Quasi-) Trennmuster für die Wandlung von
107 # Wörtern in traditioneller oder reformierter Standardorthographie in der
108 # Variante mit Unterscheidung von langem und runden S (Binnen-S vs.
109 # Schluß-S), wie sie im Satz mit gebrochenen Schriften benötigt wird. Ein
110 # Beispiel für die Anwendung dieser Muster ist das Skript
111 # `skripte/python/patuse/long_s_conversion.py`.
113 # Die Ziele `de_ligaturaufbruch`, `de-1901_ligaturaufbruch`, and
114 # `de-1996_ligaturaufbruch` erzeugen (experimentelle) Wortlisten und
115 # Trennmuster, um Ligaturen 'aufzubrechen': Die Quasi-Trennstellen zeigen an,
116 # wo Ligaturen wie 'fl' nicht auftreten dürfen.
125 SRCDIR
= $(dir $(realpath
$(lastword
$(MAKEFILE_LIST
))))
126 DATADIR
= $(SRCDIR
)/daten
127 SCRIPTDIR
= $(SRCDIR
)/skripte
128 LANGSDIR
= $(SCRIPTDIR
)/spezialmuster
/lang_s
129 WORDLIST
= $(SRCDIR
)/wortliste
133 .PHONY
: major fugen
suffix gesang
135 ifneq ($(findstring major
,$(MAKECMDGOALS
)),)
137 # A single `-` gets removed; all other combinations of `-`, `<`, `>`, and
138 # `=` are converted to a hyphen.
139 SED_PATTYPE
= $(SED
) -e
'/[=<>-]/!n' \
142 -e
's/[=<>][=<>]*/-/g'
143 PERL_PATTYPE
= -g
$(W
) -1 -U
145 ifeq ($(words $(MAKECMDGOALS
)),1)
148 # This is to suppress the 'nothing to be done' warning.
152 else ifneq ($(findstring fugen
,$(MAKECMDGOALS
)),)
154 # All combinations of `-`, `<`, `>`, `<=`, `=>` get removed, runs of `=`
155 # are converted to a hyphen.
156 SED_PATTYPE
= $(SED
) -e
'/[=<>-]/!n' \
160 -e
's/[<>][<>]*//g' \
162 PERL_PATTYPE
= -g
$(W
) -1 -U
164 ifeq ($(words $(MAKECMDGOALS
)),1)
167 # This is to suppress the 'nothing to be done' warning.
171 else ifneq ($(findstring suffix,$(MAKECMDGOALS
)),)
173 # All combinations of `-`, `<`, `=` get removed, runs of `>` are converted
175 SED_PATTYPE
= $(SED
) -e
'/[=<>-]/!n' \
177 -e
's/[<=][<=]*//g' \
179 PERL_PATTYPE
= -g
$(W
) -1 -U
181 ifeq ($(words $(MAKECMDGOALS
)),1)
184 # This is to suppress the 'nothing to be done' warning.
188 else ifneq ($(findstring gesang
,$(MAKECMDGOALS
)),)
193 ifeq ($(words $(MAKECMDGOALS
)),1)
196 # This is to suppress the 'nothing to be done' warning.
206 TRAD
= dehypht-x
$(PATTYPE
)
207 REFO
= dehyphn-x
$(PATTYPE
)
208 SWISS
= dehyphts-x
$(PATTYPE
)
210 LIGA
= $(OUTDIR
)/ligaturaufbruch
212 LC_ENVVARS
= LC_COLLATE
=de_DE.UTF-8 \
218 DATE
= $(shell date
'+%Y-%m-%d')
225 SPRACHAUSZUG
= $(PYTHON
) $(SCRIPTDIR
)/wortliste
/sprachauszug.py
228 SORT
= $(LC_ENVVARS
) sort -d
$(bsnl
)\
229 |
$(LC_ENVVARS
) uniq
-i
231 ifneq ($(findstring gesang
,$(MAKECMDGOALS
)),)
232 GERMAN_TR
= $(DATADIR
)/german-gesang.tr
236 GERMAN_TR
= $(DATADIR
)/german.tr
238 $(strip $(shell $(SED
) 's/^\(..\).*/\1/;q' < $(GERMAN_TR
)))
240 $(strip $(shell $(SED
) 's/^..\(..\).*/\1/;q' < $(GERMAN_TR
)))
243 GIT_VERSION
:= `$(CHDIR) $(SRCDIR) \
244 && $(GIT) log --format=%H -1 HEAD --`
246 TRADDIR
= $(OUTDIR
)/$(TRAD
)
247 REFODIR
= $(OUTDIR
)/$(REFO
)
248 SWISSDIR
= $(OUTDIR
)/$(SWISS
)
249 TRADFILES
= $(TRADDIR
)/$(TRAD
)-$(DATE
).pat
$(TRADDIR
)/$(TRAD
)-$(DATE
).
tex
250 REFOFILES
= $(REFODIR
)/$(REFO
)-$(DATE
).pat
$(REFODIR
)/$(REFO
)-$(DATE
).
tex
251 SWISSFILES
= $(SWISSDIR
)/$(SWISS
)-$(DATE
).pat
$(SWISSDIR
)/$(SWISS
)-$(DATE
).
tex
253 # This macro defines a backslash followed by a newline. We use it to
254 # beautify canned recipes, avoiding overlong lines in the make output.
260 # A comma cannot be part of an argument of `call`; we have to use a variable
265 override SRCDIR
:= $(shell cd
$(SRCDIR
) && $(PWD
))
272 all: pattern-trad pattern-refo pattern-swiss
274 .PHONY
: pattern-trad pattern-refo pattern-swiss
275 pattern-trad
: $(TRADFILES
)
276 pattern-refo
: $(REFOFILES
)
277 pattern-swiss
: $(SWISSFILES
)
282 .PHONY
: words-trad words-refo words-swiss
283 words-trad
: $(TRADDIR
)/words.hyphenated.trad
284 words-refo
: $(REFODIR
)/words.hyphenated.refo
285 words-swiss
: $(REFODIR
)/words.hyphenated.swiss
288 .PHONY
: pre-trad pre-refo pre-swiss
296 $(TRADFILES
) $(TRADDIR
)/words.hyphenated.trad
: | pre-trad
297 $(REFOFILES
) $(REFODIR
)/words.hyphenated.refo
: | pre-refo
298 $(SWISSFILES
) $(SWISSDIR
)/words.hyphenated.swiss
: | pre-swiss
300 # GNU make supports creation of multiple targets by a single invocation of a
301 # recipe only for pattern rules, thus we have to use a 'sentinel file' (using
302 # 'echo' for the time stamp).
304 $(TRADDIR
)/pattern
.8 $(TRADDIR
)/pattern.rules
: $(TRADDIR
)/make-full-pattern-trad
305 $(REFODIR
)/pattern
.8 $(REFODIR
)/pattern.rules
: $(REFODIR
)/make-full-pattern-refo
306 $(SWISSDIR
)/pattern
.8 $(SWISSDIR
)/pattern.rules
: $(SWISSDIR
)/make-full-pattern-swiss
309 $(DATADIR
)/german-gesang.tr
: $(DATADIR
)/german.tr
314 # $(1): pattern directory
315 define make-full-pattern
316 $(CHDIR
) $(1) $(bsnl
)\
317 && $(SH
) $(SCRIPTDIR
)/trennmuster
/make-full-pattern.sh
$(bsnl
)\
322 # Both `make-full-pattern.sh` and `german.tr` control hyphenation parameters;
323 # it is thus a good idea to make them prerequisites.
324 $(TRADDIR
)/make-full-pattern-trad \
325 $(REFODIR
)/make-full-pattern-refo \
326 $(SWISSDIR
)/make-full-pattern-swiss
: \
327 $(SCRIPTDIR
)/trennmuster
/make-full-pattern.sh \
330 $(TRADDIR
)/make-full-pattern-trad
: $(TRADDIR
)/words.hyphenated.trad
331 $(call make-full-pattern
,$(TRADDIR
))
332 $(REFODIR
)/make-full-pattern-refo
: $(REFODIR
)/words.hyphenated.refo
333 $(call make-full-pattern
,$(REFODIR
))
334 $(SWISSDIR
)/make-full-pattern-swiss
: $(SWISSDIR
)/words.hyphenated.swiss
335 $(call make-full-pattern
,$(SWISSDIR
))
337 # $(1): pattern file name
338 # $(2): pattern directory
340 $(CAT
) $(DATADIR
)/$(1).1 $(bsnl
)\
341 |
$(SED
) -e
"s/@DATE@/$(DATE)/" $(bsnl
)\
342 -e
"s/@GIT_VERSION@/$(GIT_VERSION)/" $(bsnl
)\
343 -e
"s/@LEFTHYPHENMIN@/$(LEFTHYPHENMIN)/" $(bsnl
)\
344 -e
"s/@RIGHTHYPHENMIN@/$(RIGHTHYPHENMIN)/" > $@
$(bsnl
)\
345 && $(CAT
) $(2)/pattern.rules
>> $@
$(bsnl
)\
346 && $(CAT
) $(DATADIR
)/$(TRAD
).2 >> $@
$(bsnl
)\
347 && $(CAT
) $(2)/pattern
.8 >> $@
$(bsnl
)\
348 && $(CAT
) $(DATADIR
)/$(TRAD
).3 >> $@
351 $(TRADDIR
)/$(TRAD
)-$(DATE
).pat
: $(TRADDIR
)/pattern
.8 $(TRADDIR
)/pattern.rules
352 $(call make-pat-file
,$(TRAD
),$(TRADDIR
))
353 $(REFODIR
)/$(REFO
)-$(DATE
).pat
: $(REFODIR
)/pattern
.8 $(REFODIR
)/pattern.rules
354 $(call make-pat-file
,$(REFO
),$(REFODIR
))
355 $(SWISSDIR
)/$(SWISS
)-$(DATE
).pat
: $(SWISSDIR
)/pattern
.8 $(SWISSDIR
)/pattern.rules
356 $(call make-pat-file
,$(SWISS
),$(SWISSDIR
))
358 # $(1): arguments for `extract-tex.pl`
361 |
$(PERL
) $(SCRIPTDIR
)/wortliste
/extract-tex.pl
$(1) $(PERL_PATTYPE
) $(bsnl
)\
362 |
$(SED_PATTYPE
) $(bsnl
)\
367 $(TRADDIR
)/words.hyphenated.trad
: $(WORDLIST
)
368 $(call extract-tex
,-t
)
369 $(REFODIR
)/words.hyphenated.refo
: $(WORDLIST
)
371 $(SWISSDIR
)/words.hyphenated.swiss
: $(WORDLIST
)
372 $(call extract-tex
,-s
)
375 $(SED
) -e
"s/@DATE@/$(DATE)/" $(bsnl
)\
379 $(TRADDIR
)/$(TRAD
)-$(DATE
).
tex: $(DATADIR
)/$(TRAD
).
tex.in
381 $(REFODIR
)/$(REFO
)-$(DATE
).
tex: $(DATADIR
)/$(REFO
).
tex.in
383 $(SWISSDIR
)/$(SWISS
)-$(DATE
).
tex: $(DATADIR
)/$(SWISS
).
tex.in
388 # patterns for handling Fraktur
391 # Word lists and patterns 'de-Latf' (i.e., German, Latin script, Fraktur;
392 # orthography for typesetting with Blackletter using round and long 's'):
393 # 'de-1901' (old orthography), 'de-1996' (reformed orthography), and mixed.
394 # The final patterns contain quasi-hyphenation after round 's' ('aus-sagen'
397 .PHONY
: de-Latf de-Latf-1901 de-Latf-1996
398 de-Latf
: $(LATF
)/de-Latf.pat
399 de-Latf-1901
: $(LATF
)/de-Latf-1901.pat
400 de-Latf-1996
: $(LATF
)/de-Latf-1996.pat
402 # $(1): arguments for `-l` parameter of `s2long-s.py`
405 $(PYTHON
) $(LANGSDIR
)/s2long-s.py
--drop-homonyms
-l
$(1) $(bsnl
)\
409 $(LATF
)/words-de-Latf.txt
: $(WORDLIST
) $(LANGSDIR
)/s2long-s.py
410 $(call s2long-s
,"de-1901$(,)de-1996")
411 $(LATF
)/words-de-Latf-1901.txt
: $(WORDLIST
) $(LANGSDIR
)/s2long-s.py
412 $(call s2long-s
,"de-1901")
413 $(LATF
)/words-de-Latf-1996.txt
: $(WORDLIST
) $(LANGSDIR
)/s2long-s.py
414 $(call s2long-s
,"de-1996")
417 $(PYTHON
) $(LANGSDIR
)/de_Latf_quasihyph.py
$(bsnl
)\
421 $(LATF
)/words-de-Latf.hyphenated
: $(LATF
)/words-de-Latf.txt
423 $(LATF
)/words-de-Latf-1901.hyphenated
: $(LATF
)/words-de-Latf-1901.txt
425 $(LATF
)/words-de-Latf-1996.hyphenated
: $(LATF
)/words-de-Latf-1996.txt
428 define make-full-latf-pattern
429 $(CHDIR
) $(LATF
) $(bsnl
)\
430 && $(SH
) $(SCRIPTDIR
)/trennmuster
/make-full-pattern.sh
$(bsnl
)\
431 $(<F
) $(DATADIR
)/de-Latf.tr
432 $(CAT
) $(LATF
)/pattern
.8 >> $@
435 $(LATF
)/de-Latf.pat
: $(LATF
)/words-de-Latf.hyphenated
436 $(call make-full-latf-pattern
)
437 $(LATF
)/de-Latf-1901.pat
: $(LATF
)/words-de-Latf-1901.hyphenated
438 $(call make-full-latf-pattern
)
439 $(LATF
)/de-Latf-1996.pat
: $(LATF
)/words-de-Latf-1996.hyphenated
440 $(call make-full-latf-pattern
)
444 # patterns for 'breaking up' typographic ligatures
447 # Word lists and patterns 'de_ligaturaufbruch': 'de-1901' (old orthography),
448 # 'de-1996' (reformed orthgraphy), and mixed. The final patterns contain
449 # quasi-hyphenation to indicate positions where ligatures like 'fl' must not
450 # occur ('Dorfladen' => 'Dorf-laden').
452 .PHONY
: de_ligaturaufbruch de-1901_ligaturaufbruch de-1996_ligaturaufbruch
453 de_ligaturaufbruch
: $(LIGA
)/de_ligaturaufbruch-
$(DATE
).pat
454 de-1901_ligaturaufbruch
: $(LIGA
)/de-1901_ligaturaufbruch-
$(DATE
).pat
455 de-1996_ligaturaufbruch
: $(LIGA
)/de-1996_ligaturaufbruch-
$(DATE
).pat
457 # $(1): arguments for `-l` parameter of `sprachauszug.py`
458 define ligaturaufbruch-eingabe
460 $(SPRACHAUSZUG
) -l
$(1) -s
"morphemgrenzen,einfach" $(bsnl
)\
464 $(LIGA
)/de_ligaturaufbruch.eingabe
: $(WORDLIST
)
465 $(call ligaturaufbruch-eingabe
,"de-1901$(,)de-CH-1901$(,)de-1996$(,)de-CH-1996")
466 $(LIGA
)/de-1901_ligaturaufbruch.eingabe
: $(WORDLIST
)
467 $(call ligaturaufbruch-eingabe
,"de-1901$(,)de-CH-1901")
468 $(LIGA
)/de-1996_ligaturaufbruch.eingabe
: $(WORDLIST
)
469 $(call ligaturaufbruch-eingabe
,"de-1996$(,)de-CH-1996")
471 define make-full-liga-pattern
472 $(CHDIR
) $(LIGA
) $(bsnl
)\
473 && $(SH
) $(SCRIPTDIR
)/trennmuster
/make-full-pattern.sh
$(bsnl
)\
474 $(<F
) $(DATADIR
)/de-Latf.tr
475 $(CAT
) $(LIGA
)/pattern
.8 >> $@
478 $(LIGA
)/de_ligaturaufbruch-
$(DATE
).pat
: $(LIGA
)/de_ligaturaufbruch.eingabe
479 $(call make-full-liga-pattern
)
480 $(LIGA
)/de-1901_ligaturaufbruch-
$(DATE
).pat
: $(LIGA
)/de-1901_ligaturaufbruch.eingabe
481 $(call make-full-liga-pattern
)
482 $(LIGA
)/de-1996_ligaturaufbruch-
$(DATE
).pat
: $(LIGA
)/de-1996_ligaturaufbruch.eingabe
483 $(call make-full-liga-pattern
)
491 # Exzerpte mit `sprachauszug.py`
493 exzerpte
/de-1996_morphemgrenzen
:
495 $(SPRACHAUSZUG
) -l de-1996 \
496 -s
"morphemgrenzen,einfach" < $(WORDLIST
) > $@
497 exzerpte
/de-1901_morphemgrenzen
:
499 $(SPRACHAUSZUG
) -l de-1901 \
500 -s
"morphemgrenzen,einfach" < $(WORDLIST
) > $@
501 exzerpte
/de-1996_hyphenmin3
:
503 $(SPRACHAUSZUG
) -l
"de-1996,de-1996-x-versal" \
504 -s
"standard,morphemisch,hyphenmin3,einfach" < $(WORDLIST
) > $@
505 exzerpte
/de-1996_gesangstext
:
507 $(SPRACHAUSZUG
) -l
"de-1996,de-1996-x-versal" \
508 -s
"syllabisch,gesangstext,einfach" < $(WORDLIST
) > $@