1 # -*- coding: utf-8 -*-
3 # (See below for a German introduction.)
5 # This Makefile creates German hyphenation patterns in subdirectories
6 # `$(TRAD)` and `$(REFO)` for traditional and new orthography, respectively.
7 # Hyphenation patterns for traditional Swiss German are generated in
8 # directory `$(SWISS)`.
10 # The input data is expected to be in `$(SRCDIR)`, which by default is set to
11 # the directory containing the Makefile. Output goes to directory
12 # `$(OUTDIR)`, which by default is set to './muster'.
14 # The possible targets are `pattern-trad`, `pattern-refo`, and
15 # `pattern-swiss`. If no target (or target `all`) is given, all patterns for
16 # all three targets are built.
21 # mkdir build-patterns
23 # make --makefile=~/git/wortliste/Makefile OUTDIR=. pattern-trad
26 # If you add one of the (phony) targets `major`, `fugen`, or `suffix`,
27 # patterns that only use major hyphenation points ('Haupttrennstellen') are
28 # created. If you add the (phony) target `gesang`, patterns usable for
29 # German lyrics are created. Example:
32 # make major pattern-refo
35 # The output directories have `-major` (etc.) appended to their names. Note
36 # that the `major`, `fugen`, and `suffix` targets reflect the markup in the
37 # `wortliste` file; they are not intended to produce patterns for 'good'
38 # hyphenation in text but rather to test the consistency of the markup, and
39 # to assist in adding new words.
41 # To control the used weights in the major hyphenation patterns, add variable
42 # `W=N`, where `N` gives the quality: value 1 specifies the best hyphenation
43 # points only, value 2 both the best and second-best points, etc. The
44 # default is value 0, using all major hyphenation points.
46 # The targets `schluss-s`, `schluss-s-1901`, and `schluss-s-1996` create
47 # (experimental) patterns and word lists for converting words in traditional
48 # and new orthography that make a distinction between long and round S as
49 # needed for typesetting with Fraktur fonts. An example for applying these
50 # patterns is the script `skripte/lib/py_patuse/long_s_conversion.py`.
52 # The targets `de_ligaturaufbruch`, `de-1901_ligaturaufbruch`, and
53 # `de-1996_ligaturaufbruch` create (experimental) patterns and word lists for
54 # 'breaking up' ligatures: The "hyphenation" points indicate positions where
55 # ligatures like 'fl' must not occur.
58 # Dieses Makefile erzeugt deutsche Trennmuster in den Unterverzeichnissen
59 # `$(TRAD)` und `$(REFO)` für die traditionelle bzw. reformierte
60 # Rechtschreibung. Trennmuster für tradionelles deutschschweizerisches
61 # Deutsch werden Verzeichnis `$(SWISS)` erzeugt.
63 # Die Eingabedaten werden im Verzeichnis `$(SRCDIR)` erwartet, welches
64 # standardmäßig identisch zum dem Verzeichnis ist, welches die
65 # `Makefile`-Datei enthält. Die Ausgabe wird in Verzeichnis `$(OUTDIR)`
66 # erzeugt, welches standardmäßig den Namen './muster' hat.
68 # Die möglichen Make-Ziele sind `pattern-trad`, `pattern-refo` und
69 # `pattern-swiss`. Wenn kein Ziel angegeben ist (oder man das Ziel `all`
70 # verwendet), werden alle drei Trennmuster erzeugt.
75 # mkdir build-patterns
77 # make --makefile=~/git/wortliste/Makefile OUTDIR=. pattern-trad
80 # Wird eines der zusätzlichen (künstlichen) Ziele `major`, `fugen` oder
81 # `suffix` angegeben, werden Haupttrennstellmuster erzeugt. Wird das
82 # (künstliche) Ziel `gesang` angegeben, werden Muster mit
83 # Gesangstrennstellen erzeugt.
88 # make major pattern-refo
91 # Die verwendeten Verzeichnisnamen sind die gleichen wie oben, allerdings mit
92 # einem angehängten `-major`, `-fugen`, `-suffix` bzw. `-gesang`.
94 # Die Haupttrennstellmuster spiegeln die Auszeichnung in der Liste direkt
95 # wider. Sie haben nicht das Ziel, "gute" Trennungen in Texten zu erzeugen,
96 # sondern sind zum Testen der Konsistenz der Auszeichnung sowie zum
97 # "kategorisierten" Markieren der Trennstellen neuer Wörter gedacht.
99 # Bei `major` kann die Menge der verwendeten Haupttrennstellen mittels der
100 # Variable `W=N' (Wichtungs-Schwellwert) kontrolliert werden, wo `N` die
101 # Qualität angibt: Wert 1 selektiert nur die besten Haupttrennstellen, Wert 2
102 # die besten und zweitbesten Haupttrennstellen usw. Der Standardwert für `W`
103 # ist 0; er gibt an, dass alle Haupttrennstellen verwendet werden sollen.
105 # Die Ziele `schluss-s`, `schluss-s-1901` und `schluss-s-1996` erzeugen
106 # (experimentelle) Wortlisten und Muster für die Wandlung von
107 # Wörtern in die Orthographievariante mit Unterscheidung von langem und
108 # runden S (Binnen-S vs. Schluß-S), wie sie im Satz mit gebrochenen Schriften
109 # benötigt wird (de-Latf). Ein Beispiel für die Anwendung dieser Muster
110 # ist das Skript `skripte/lib/py_patuse/long_s_conversion.py`.
112 # Die Ziele `de_ligaturaufbruch`, `de-1901_ligaturaufbruch`, and
113 # `de-1996_ligaturaufbruch` erzeugen (experimentelle) Wortlisten und
114 # Trennmuster, um Ligaturen 'aufzubrechen': Die Trennstellen zeigen an,
115 # wo Ligaturen wie 'fl' nicht auftreten dürfen.
124 SRCDIR
= $(dir $(realpath
$(lastword
$(MAKEFILE_LIST
))))
125 DATADIR
= $(SRCDIR
)/daten
126 SCRIPTDIR
= $(SRCDIR
)/skripte
127 WORDLIST
= $(SRCDIR
)/wortliste
131 .PHONY
: major fugen
suffix gesang
134 ifneq ($(findstring major
,$(MAKECMDGOALS
)),)
136 # A single `-` gets removed; all other combinations of `-`, `<`, `>`, and
137 # `=` are converted to a hyphen.
138 SED_PATTYPE
= $(SED
) -e
'/[=<>-]/!n' \
141 -e
's/[=<>][=<>]*/-/g'
142 PERL_PATTYPE
= -g
$(W
) -1 -U
144 ifeq ($(words $(MAKECMDGOALS
)),1)
147 # This is to suppress the 'nothing to be done' warning.
151 else ifneq ($(findstring fugen
,$(MAKECMDGOALS
)),)
153 # All combinations of `-`, `<`, `>`, `<=`, `=>` get removed, runs of `=`
154 # are converted to a hyphen.
155 SED_PATTYPE
= $(SED
) -e
'/[=<>-]/!n' \
159 -e
's/[<>][<>]*//g' \
161 PERL_PATTYPE
= -g
$(W
) -1 -U
163 ifeq ($(words $(MAKECMDGOALS
)),1)
166 # This is to suppress the 'nothing to be done' warning.
170 else ifneq ($(findstring suffix,$(MAKECMDGOALS
)),)
172 # All combinations of `-`, `<`, `=` get removed, runs of `>` are converted
174 SED_PATTYPE
= $(SED
) -e
'/[=<>-]/!n' \
176 -e
's/[<=][<=]*//g' \
178 PERL_PATTYPE
= -g
$(W
) -1 -U
180 ifeq ($(words $(MAKECMDGOALS
)),1)
183 # This is to suppress the 'nothing to be done' warning.
187 else ifneq ($(findstring gesang
,$(MAKECMDGOALS
)),)
192 ifeq ($(words $(MAKECMDGOALS
)),1)
195 # This is to suppress the 'nothing to be done' warning.
205 TRAD
= dehypht-x
$(PATTYPE
)
206 REFO
= dehyphn-x
$(PATTYPE
)
207 SWISS
= dehyphts-x
$(PATTYPE
)
209 LC_ENVVARS
= LC_COLLATE
=de_DE.UTF-8 \
215 DATE
= $(shell date
'+%Y-%m-%d')
221 SPRACHAUSZUG
= $(SCRIPTDIR
)/wortliste
/sprachauszug.py
224 SORT
= $(LC_ENVVARS
) sort -d
$(bsnl
)\
225 |
$(LC_ENVVARS
) uniq
-i
227 ifneq ($(findstring gesang
,$(MAKECMDGOALS
)),)
228 GERMAN_TR
= $(DATADIR
)/german-gesang.tr
232 GERMAN_TR
= $(DATADIR
)/german.tr
234 $(strip $(shell $(SED
) 's/^\(..\).*/\1/;q' < $(GERMAN_TR
)))
236 $(strip $(shell $(SED
) 's/^..\(..\).*/\1/;q' < $(GERMAN_TR
)))
239 GIT_VERSION
:= `$(CHDIR) $(SRCDIR) \
240 && $(GIT) log --format=%H -1 HEAD --`
242 TRADDIR
= $(OUTDIR
)/$(TRAD
)
243 REFODIR
= $(OUTDIR
)/$(REFO
)
244 SWISSDIR
= $(OUTDIR
)/$(SWISS
)
245 TRADFILES
= $(TRADDIR
)/$(TRAD
)-$(DATE
).pat
$(TRADDIR
)/$(TRAD
)-$(DATE
).
tex
246 REFOFILES
= $(REFODIR
)/$(REFO
)-$(DATE
).pat
$(REFODIR
)/$(REFO
)-$(DATE
).
tex
247 SWISSFILES
= $(SWISSDIR
)/$(SWISS
)-$(DATE
).pat
$(SWISSDIR
)/$(SWISS
)-$(DATE
).
tex
249 # This macro defines a backslash followed by a newline. We use it to
250 # beautify canned recipes, avoiding overlong lines in the make output.
257 override SRCDIR
:= $(shell cd
$(SRCDIR
) && $(PWD
))
264 all: pattern-trad pattern-refo pattern-swiss
266 .PHONY
: pattern-trad pattern-refo pattern-swiss
267 pattern-trad
: $(TRADFILES
)
268 pattern-refo
: $(REFOFILES
)
269 pattern-swiss
: $(SWISSFILES
)
274 .PHONY
: words-trad words-refo words-swiss
275 words-trad
: $(TRADDIR
)/words.hyphenated.trad
276 words-refo
: $(REFODIR
)/words.hyphenated.refo
277 words-swiss
: $(REFODIR
)/words.hyphenated.swiss
280 .PHONY
: pre-trad pre-refo pre-swiss
288 $(TRADFILES
) $(TRADDIR
)/words.hyphenated.trad
: | pre-trad
289 $(REFOFILES
) $(REFODIR
)/words.hyphenated.refo
: | pre-refo
290 $(SWISSFILES
) $(SWISSDIR
)/words.hyphenated.swiss
: | pre-swiss
292 # GNU make supports creation of multiple targets by a single invocation of a
293 # recipe only for pattern rules, thus we have to use a 'sentinel file' (using
294 # 'echo' for the time stamp).
296 $(TRADDIR
)/pattern
.8 $(TRADDIR
)/pattern.rules
: $(TRADDIR
)/make-full-pattern-trad
297 $(REFODIR
)/pattern
.8 $(REFODIR
)/pattern.rules
: $(REFODIR
)/make-full-pattern-refo
298 $(SWISSDIR
)/pattern
.8 $(SWISSDIR
)/pattern.rules
: $(SWISSDIR
)/make-full-pattern-swiss
301 $(DATADIR
)/german-gesang.tr
: $(DATADIR
)/german.tr
306 # $(1): pattern directory
307 define make-full-pattern
308 $(CHDIR
) $(1) $(bsnl
)\
309 && $(SH
) $(SCRIPTDIR
)/trennmuster
/make-full-pattern.sh
$(bsnl
)\
314 # Both `make-full-pattern.sh` and `german.tr` control hyphenation parameters;
315 # it is thus a good idea to make them prerequisites.
316 $(TRADDIR
)/make-full-pattern-trad \
317 $(REFODIR
)/make-full-pattern-refo \
318 $(SWISSDIR
)/make-full-pattern-swiss
: \
319 $(SCRIPTDIR
)/trennmuster
/make-full-pattern.sh \
322 $(TRADDIR
)/make-full-pattern-trad
: $(TRADDIR
)/words.hyphenated.trad
323 $(call make-full-pattern
,$(TRADDIR
))
324 $(REFODIR
)/make-full-pattern-refo
: $(REFODIR
)/words.hyphenated.refo
325 $(call make-full-pattern
,$(REFODIR
))
326 $(SWISSDIR
)/make-full-pattern-swiss
: $(SWISSDIR
)/words.hyphenated.swiss
327 $(call make-full-pattern
,$(SWISSDIR
))
329 # $(1): pattern file name
330 # $(2): pattern directory
332 $(CAT
) $(DATADIR
)/$(1).1 $(bsnl
)\
333 |
$(SED
) -e
"s/@DATE@/$(DATE)/" $(bsnl
)\
334 -e
"s/@GIT_VERSION@/$(GIT_VERSION)/" $(bsnl
)\
335 -e
"s/@LEFTHYPHENMIN@/$(LEFTHYPHENMIN)/" $(bsnl
)\
336 -e
"s/@RIGHTHYPHENMIN@/$(RIGHTHYPHENMIN)/" > $@
$(bsnl
)\
337 && $(CAT
) $(2)/pattern.rules
>> $@
$(bsnl
)\
338 && $(CAT
) $(DATADIR
)/$(TRAD
).2 >> $@
$(bsnl
)\
339 && $(CAT
) $(2)/pattern
.8 >> $@
$(bsnl
)\
340 && $(CAT
) $(DATADIR
)/$(TRAD
).3 >> $@
343 $(TRADDIR
)/$(TRAD
)-$(DATE
).pat
: $(TRADDIR
)/pattern
.8 $(TRADDIR
)/pattern.rules
344 $(call make-pat-file
,$(TRAD
),$(TRADDIR
))
345 $(REFODIR
)/$(REFO
)-$(DATE
).pat
: $(REFODIR
)/pattern
.8 $(REFODIR
)/pattern.rules
346 $(call make-pat-file
,$(REFO
),$(REFODIR
))
347 $(SWISSDIR
)/$(SWISS
)-$(DATE
).pat
: $(SWISSDIR
)/pattern
.8 $(SWISSDIR
)/pattern.rules
348 $(call make-pat-file
,$(SWISS
),$(SWISSDIR
))
350 # $(1): arguments for `extract-tex.pl`
353 |
$(PERL
) $(SCRIPTDIR
)/wortliste
/extract-tex.pl
$(1) $(PERL_PATTYPE
) $(bsnl
)\
354 |
$(SED_PATTYPE
) $(bsnl
)\
359 $(TRADDIR
)/words.hyphenated.trad
: $(WORDLIST
)
360 $(call extract-tex
,-t
)
361 $(REFODIR
)/words.hyphenated.refo
: $(WORDLIST
)
363 $(SWISSDIR
)/words.hyphenated.swiss
: $(WORDLIST
)
364 $(call extract-tex
,-s
)
367 $(SED
) -e
"s/@DATE@/$(DATE)/" $(bsnl
)\
371 $(TRADDIR
)/$(TRAD
)-$(DATE
).
tex: $(DATADIR
)/$(TRAD
).
tex.in
373 $(REFODIR
)/$(REFO
)-$(DATE
).
tex: $(DATADIR
)/$(REFO
).
tex.in
375 $(SWISSDIR
)/$(SWISS
)-$(DATE
).
tex: $(DATADIR
)/$(SWISS
).
tex.in
380 # patterns for handling round 's' vs. long 'ſ'
383 # Word lists and patterns for converting words with only round 's' to
384 # words with distinction of round and long 's' according to the German
385 # orthograpy variant "de-Latf" (for texts typeset using *Fraktur*) .
387 # The patterns contain only round 's'. Hyphenation points after 's' indicate
388 # final 's' inside a word ('Aus-schuss' == 'Ausſchuſs').
390 SCHLUSS_S_DIR
= $(OUTDIR
)/schluss-s
391 S2LONG_S
= $(SCRIPTDIR
)/spezialmuster
/lang_s
/s2long-s.py
392 SCHLUSS_S_QUASIHYPH
= $(SCRIPTDIR
)/spezialmuster
/lang_s
/final_s_quasihyph.py
394 .PHONY
: schluss-s schluss-s-1901 schluss-s-1996
395 schluss-s
: $(SCHLUSS_S_DIR
)/de_schluss-s.pat
396 schluss-s-1901
: $(SCHLUSS_S_DIR
)/de-1901_schluss-s.pat
397 schluss-s-1996
: $(SCHLUSS_S_DIR
)/de-1996_schluss-s.pat
399 # unhyphenated words with distinction of long-s and round-s
400 # (Auſsage, …, Zynismus)
401 exzerpte
/de-Latf
: $(WORDLIST
) $(S2LONG_S
)
402 $(S2LONG_S
) --drop-homonyms
-l
'de-1901,de-1996' < $< > $@
403 exzerpte
/de-Latf-1901
: $(WORDLIST
) $(S2LONG_S
)
404 $(S2LONG_S
) --drop-homonyms
-l de-1901
< $< > $@
405 exzerpte
/de-Latf-1996
: $(WORDLIST
) $(S2LONG_S
)
406 $(S2LONG_S
) --drop-homonyms
-l de-1996
< $< > $@
408 # words with hyphens following final "s" inside a word
409 # (Aus-sage, …, Zynis-mus)
410 $(SCHLUSS_S_DIR
)/de_schluss-s
: exzerpte
/de-Latf
411 $(SCHLUSS_S_QUASIHYPH
) < $< > $@
412 $(SCHLUSS_S_DIR
)/de-1901_schluss-s
: exzerpte
/de-Latf-1901
413 $(SCHLUSS_S_QUASIHYPH
) < $< > $@
414 $(SCHLUSS_S_DIR
)/de-1996_schluss-s
: exzerpte
/de-Latf-1996
415 $(SCHLUSS_S_QUASIHYPH
) < $< > $@
417 # patterns with breakpoints following final "s" inside a word
418 define schluss-s-muster
419 $(CHDIR
) $(SCHLUSS_S_DIR
) $(bsnl
)\
420 && $(SH
) $(SCRIPTDIR
)/trennmuster
/make-full-pattern.sh
$(bsnl
)\
421 $(<F
) $(DATADIR
)/german.tr
422 $(CAT
) $(SCHLUSS_S_DIR
)/pattern
.8 >> $@
425 $(SCHLUSS_S_DIR
)/de_schluss-s.pat
: $(SCHLUSS_S_DIR
)/de_schluss-s
426 $(call schluss-s-muster
)
427 $(SCHLUSS_S_DIR
)/de-1901_schluss-s.pat
: $(SCHLUSS_S_DIR
)/de-1901_schluss-s
428 $(call schluss-s-muster
)
429 $(SCHLUSS_S_DIR
)/de-1996_schluss-s.pat
: $(SCHLUSS_S_DIR
)/de-1996_schluss-s
430 $(call schluss-s-muster
)
434 # patterns for 'breaking up' typographic ligatures
437 # Word lists and patterns 'de_ligaturaufbruch': 'de-1901' (old orthography),
438 # 'de-1996' (reformed orthgraphy), and mixed. The final patterns contain
439 # hyphenation points at positions where ligatures like 'fl' must not
440 # occur ('Dorfladen' => 'Dorf-laden').
442 .PHONY
: de_ligaturaufbruch de-1901_ligaturaufbruch de-1996_ligaturaufbruch
443 de_ligaturaufbruch
: $(LIGA
)/de_ligaturaufbruch-
$(DATE
).pat
444 de-1901_ligaturaufbruch
: $(LIGA
)/de-1901_ligaturaufbruch-
$(DATE
).pat
445 de-1996_ligaturaufbruch
: $(LIGA
)/de-1996_ligaturaufbruch-
$(DATE
).pat
447 LIGA
= $(OUTDIR
)/ligaturaufbruch
449 # $(1): arguments for `-l` parameter of `sprachauszug.py`
450 define ligaturaufbruch-eingabe
452 $(SPRACHAUSZUG
) -l
$(1) -s
"morphemgrenzen,einfach" $(bsnl
)\
456 $(LIGA
)/de_ligaturaufbruch.eingabe
: $(WORDLIST
)
457 $(call ligaturaufbruch-eingabe
,"de-1901:de-CH-1901:de-1996:de-CH-1996")
458 $(LIGA
)/de-1901_ligaturaufbruch.eingabe
: $(WORDLIST
)
459 $(call ligaturaufbruch-eingabe
,"de-1901:de-CH-1901")
460 $(LIGA
)/de-1996_ligaturaufbruch.eingabe
: $(WORDLIST
)
461 $(call ligaturaufbruch-eingabe
,"de-1996:de-CH-1996")
463 define make-full-liga-pattern
464 $(CHDIR
) $(LIGA
) $(bsnl
)\
465 && $(SH
) $(SCRIPTDIR
)/trennmuster
/make-full-pattern.sh
$(bsnl
)\
466 $(<F
) $(DATADIR
)/german.tr
467 $(CAT
) $(LIGA
)/pattern
.8 >> $@
470 $(LIGA
)/de_ligaturaufbruch-
$(DATE
).pat
: $(LIGA
)/de_ligaturaufbruch.eingabe
471 $(call make-full-liga-pattern
)
472 $(LIGA
)/de-1901_ligaturaufbruch-
$(DATE
).pat
: $(LIGA
)/de-1901_ligaturaufbruch.eingabe
473 $(call make-full-liga-pattern
)
474 $(LIGA
)/de-1996_ligaturaufbruch-
$(DATE
).pat
: $(LIGA
)/de-1996_ligaturaufbruch.eingabe
475 $(call make-full-liga-pattern
)
483 # Exzerpte mit `sprachauszug.py`
485 exzerpte
/de-1996_morphemgrenzen
:
486 $(SPRACHAUSZUG
) -l de-1996 \
487 -s
"morphemgrenzen,einfach" < $(WORDLIST
) > $@
488 exzerpte
/de-1901_morphemgrenzen
:
489 $(SPRACHAUSZUG
) -l de-1901 \
490 -s
"morphemgrenzen,einfach" < $(WORDLIST
) > $@
491 exzerpte
/de-1996_hyphenmin3
:
492 $(SPRACHAUSZUG
) -l
"de-1996,de-1996-x-versal" \
493 -s
"standard,morphemisch,hyphenmin3,einfach" < $(WORDLIST
) > $@
494 exzerpte
/de-1996_gesangstext
:
495 $(SPRACHAUSZUG
) -l
"de-1996,de-1996-x-versal" \
496 -s
"syllabisch,gesangstext,einfach" < $(WORDLIST
) > $@