1 # -*- coding: utf-8 -*-
3 # (See below for a German introduction.)
5 # This Makefile creates German hyphenation patterns in subdirectories
6 # `$(TRAD)` and `$(REFO)` for traditional and new orthography, respectively.
7 # Hyphenation patterns for traditional Swiss German are generated in
8 # directory `$(SWISS)`.
10 # The input data is expected to be in `$(SRCDIR)`, which by default is set to
11 # the directory containing the Makefile. Output goes to directory
12 # `$(OUTDIR)`, which by default is set to './muster'.
14 # The possible targets are `pattern-trad`, `pattern-refo`, and
15 # `pattern-swiss`. If no target (or target `all`) is given, all patterns for
16 # all three targets are built.
21 # mkdir build-patterns
23 # make --makefile=~/git/wortliste/Makefile OUTDIR=. pattern-trad
26 # If you add one of the (phony) targets `major`, `fugen`, or `suffix`,
27 # patterns that only use major hyphenation points ('Haupttrennstellen') are
28 # created. If you add the (phony) target `gesang`, patterns usable for
29 # German lyrics are created. Example:
32 # make major pattern-refo
35 # The output directories have `-major` (etc.) appended to their names. Note
36 # that the `major`, `fugen`, and `suffix` targets reflect the markup in the
37 # `wortliste` file; they are not intended to produce patterns for 'good'
38 # hyphenation in text but rather to test the consistency of the markup, and
39 # to assist in adding new words.
41 # To control the used weights in the major hyphenation patterns, add variable
42 # `W=N`, where `N` gives the quality: value 1 specifies the best hyphenation
43 # points only, value 2 both the best and second-best points, etc. The
44 # default is value 0, using all major hyphenation points.
46 # The targets `schluss-s`, `schluss-s-1901`, and `schluss-s-1996` create
47 # (experimental) patterns and word lists for converting words in traditional
48 # and new orthography that make a distinction between long and round S as
49 # needed for typesetting with Fraktur fonts. An example for applying these
50 # patterns is the script `skripte/lib/py_patuse/long_s_conversion.py`.
52 # The targets `de_ligaturaufbruch`, `de-1901_ligaturaufbruch`, and
53 # `de-1996_ligaturaufbruch` create (experimental) patterns and word lists for
54 # 'breaking up' ligatures: The "hyphenation" points indicate positions where
55 # ligatures like 'fl' must not occur.
58 # Dieses Makefile erzeugt deutsche Trennmuster in den Unterverzeichnissen
59 # `$(TRAD)` und `$(REFO)` für die traditionelle bzw. reformierte
60 # Rechtschreibung. Trennmuster für tradionelles deutschschweizerisches
61 # Deutsch werden Verzeichnis `$(SWISS)` erzeugt.
63 # Die Eingabedaten werden im Verzeichnis `$(SRCDIR)` erwartet, welches
64 # standardmäßig identisch zum dem Verzeichnis ist, welches die
65 # `Makefile`-Datei enthält. Die Ausgabe wird in Verzeichnis `$(OUTDIR)`
66 # erzeugt, welches standardmäßig den Namen './muster' hat.
68 # Die möglichen Make-Ziele sind `pattern-trad`, `pattern-refo` und
69 # `pattern-swiss`. Wenn kein Ziel angegeben ist (oder man das Ziel `all`
70 # verwendet), werden alle drei Trennmuster erzeugt.
75 # mkdir build-patterns
77 # make --makefile=~/git/wortliste/Makefile OUTDIR=. pattern-trad
80 # Wird eines der zusätzlichen (künstlichen) Ziele `major`, `fugen` oder
81 # `suffix` angegeben, werden Haupttrennstellmuster erzeugt. Wird das
82 # (künstliche) Ziel `gesang` angegeben, werden Muster mit
83 # Gesangstrennstellen erzeugt.
88 # make major pattern-refo
91 # Die verwendeten Verzeichnisnamen sind die gleichen wie oben, allerdings mit
92 # einem angehängten `-major`, `-fugen`, `-suffix` bzw. `-gesang`.
94 # Die Haupttrennstellmuster spiegeln die Auszeichnung in der Liste direkt
95 # wider. Sie haben nicht das Ziel, "gute" Trennungen in Texten zu erzeugen,
96 # sondern sind zum Testen der Konsistenz der Auszeichnung sowie zum
97 # "kategorisierten" Markieren der Trennstellen neuer Wörter gedacht.
99 # Bei `major` kann die Menge der verwendeten Haupttrennstellen mittels der
100 # Variable `W=N' (Wichtungs-Schwellwert) kontrolliert werden, wo `N` die
101 # Qualität angibt: Wert 1 selektiert nur die besten Haupttrennstellen, Wert 2
102 # die besten und zweitbesten Haupttrennstellen usw. Der Standardwert für `W`
103 # ist 0; er gibt an, dass alle Haupttrennstellen verwendet werden sollen.
105 # Die Ziele `schluss-s`, `schluss-s-1901` und `schluss-s-1996` erzeugen
106 # (experimentelle) Wortlisten und Muster für die Wandlung von
107 # Wörtern in die Orthographievariante mit Unterscheidung von langem und
108 # runden S (Binnen-S vs. Schluß-S), wie sie im Satz mit gebrochenen Schriften
109 # benötigt wird (de-Latf). Ein Beispiel für die Anwendung dieser Muster
110 # ist das Skript `skripte/lib/py_patuse/long_s_conversion.py`.
112 # Die Ziele `de_ligaturaufbruch`, `de-1901_ligaturaufbruch`, and
113 # `de-1996_ligaturaufbruch` erzeugen (experimentelle) Wortlisten und
114 # Trennmuster, um Ligaturen 'aufzubrechen': Die Trennstellen zeigen an,
115 # wo Ligaturen wie 'fl' nicht auftreten dürfen.
124 SRCDIR
= $(dir $(realpath
$(lastword
$(MAKEFILE_LIST
))))
125 DATADIR
= $(SRCDIR
)/daten
126 SCRIPTDIR
= $(SRCDIR
)/skripte
127 WORDLIST
= $(SRCDIR
)/wortliste
131 .PHONY
: major fugen
suffix gesang
134 ifneq ($(findstring major
,$(MAKECMDGOALS
)),)
136 # A single `-` gets removed; all other combinations of `-`, `<`, `>`, and
137 # `=` are converted to a hyphen.
138 SED_PATTYPE
= $(SED
) -e
'/[=<>-]/!n' \
141 -e
's/[=<>][=<>]*/-/g'
142 PERL_PATTYPE
= -g
$(W
) -1 -U
144 ifeq ($(words $(MAKECMDGOALS
)),1)
147 # This is to suppress the 'nothing to be done' warning.
151 else ifneq ($(findstring fugen
,$(MAKECMDGOALS
)),)
153 # All combinations of `-`, `<`, `>`, `<=`, `=>` get removed, runs of `=`
154 # are converted to a hyphen.
155 SED_PATTYPE
= $(SED
) -e
'/[=<>-]/!n' \
159 -e
's/[<>][<>]*//g' \
161 PERL_PATTYPE
= -g
$(W
) -1 -U
163 ifeq ($(words $(MAKECMDGOALS
)),1)
166 # This is to suppress the 'nothing to be done' warning.
170 else ifneq ($(findstring suffix,$(MAKECMDGOALS
)),)
172 # All combinations of `-`, `<`, `=` get removed, runs of `>` are converted
174 SED_PATTYPE
= $(SED
) -e
'/[=<>-]/!n' \
176 -e
's/[<=][<=]*//g' \
178 PERL_PATTYPE
= -g
$(W
) -1 -U
180 ifeq ($(words $(MAKECMDGOALS
)),1)
183 # This is to suppress the 'nothing to be done' warning.
187 else ifneq ($(findstring gesang
,$(MAKECMDGOALS
)),)
192 ifeq ($(words $(MAKECMDGOALS
)),1)
195 # This is to suppress the 'nothing to be done' warning.
205 TRAD
= dehypht-x
$(PATTYPE
)
206 REFO
= dehyphn-x
$(PATTYPE
)
207 SWISS
= dehyphts-x
$(PATTYPE
)
209 LC_ENVVARS
= LC_COLLATE
=de_DE.UTF-8 \
215 DATE
= $(shell date
'+%Y-%m-%d')
221 SPRACHAUSZUG
= $(SCRIPTDIR
)/wortliste
/sprachauszug.py
222 S2LONG_S
= $(SCRIPTDIR
)/spezialmuster
/lang_s
/s2long-s.py
225 SORT
= $(LC_ENVVARS
) sort -d
$(bsnl
)\
226 |
$(LC_ENVVARS
) uniq
-i
228 ifneq ($(findstring gesang
,$(MAKECMDGOALS
)),)
229 GERMAN_TR
= $(DATADIR
)/german-gesang.tr
233 GERMAN_TR
= $(DATADIR
)/german.tr
235 $(strip $(shell $(SED
) 's/^\(..\).*/\1/;q' < $(GERMAN_TR
)))
237 $(strip $(shell $(SED
) 's/^..\(..\).*/\1/;q' < $(GERMAN_TR
)))
240 GIT_VERSION
:= `$(CHDIR) $(SRCDIR) \
241 && $(GIT) log --format=%H -1 HEAD --`
243 TRADDIR
= $(OUTDIR
)/$(TRAD
)
244 REFODIR
= $(OUTDIR
)/$(REFO
)
245 SWISSDIR
= $(OUTDIR
)/$(SWISS
)
246 TRADFILES
= $(TRADDIR
)/$(TRAD
)-$(DATE
).pat
$(TRADDIR
)/$(TRAD
)-$(DATE
).
tex
247 REFOFILES
= $(REFODIR
)/$(REFO
)-$(DATE
).pat
$(REFODIR
)/$(REFO
)-$(DATE
).
tex
248 SWISSFILES
= $(SWISSDIR
)/$(SWISS
)-$(DATE
).pat
$(SWISSDIR
)/$(SWISS
)-$(DATE
).
tex
250 # This macro defines a backslash followed by a newline. We use it to
251 # beautify canned recipes, avoiding overlong lines in the make output.
258 override SRCDIR
:= $(shell cd
$(SRCDIR
) && $(PWD
))
265 all: pattern-trad pattern-refo pattern-swiss
267 .PHONY
: pattern-trad pattern-refo pattern-swiss
268 pattern-trad
: $(TRADFILES
)
269 pattern-refo
: $(REFOFILES
)
270 pattern-swiss
: $(SWISSFILES
)
275 .PHONY
: words-trad words-refo words-swiss
276 words-trad
: $(TRADDIR
)/words.hyphenated.trad
277 words-refo
: $(REFODIR
)/words.hyphenated.refo
278 words-swiss
: $(REFODIR
)/words.hyphenated.swiss
281 .PHONY
: pre-trad pre-refo pre-swiss
289 $(TRADFILES
) $(TRADDIR
)/words.hyphenated.trad
: | pre-trad
290 $(REFOFILES
) $(REFODIR
)/words.hyphenated.refo
: | pre-refo
291 $(SWISSFILES
) $(SWISSDIR
)/words.hyphenated.swiss
: | pre-swiss
293 # GNU make supports creation of multiple targets by a single invocation of a
294 # recipe only for pattern rules, thus we have to use a 'sentinel file' (using
295 # 'echo' for the time stamp).
297 $(TRADDIR
)/pattern
.8 $(TRADDIR
)/pattern.rules
: $(TRADDIR
)/make-full-pattern-trad
298 $(REFODIR
)/pattern
.8 $(REFODIR
)/pattern.rules
: $(REFODIR
)/make-full-pattern-refo
299 $(SWISSDIR
)/pattern
.8 $(SWISSDIR
)/pattern.rules
: $(SWISSDIR
)/make-full-pattern-swiss
302 $(DATADIR
)/german-gesang.tr
: $(DATADIR
)/german.tr
307 # $(1): pattern directory
308 define make-full-pattern
309 $(CHDIR
) $(1) $(bsnl
)\
310 && $(SH
) $(SCRIPTDIR
)/trennmuster
/make-full-pattern.sh
$(bsnl
)\
315 # Both `make-full-pattern.sh` and `german.tr` control hyphenation parameters;
316 # it is thus a good idea to make them prerequisites.
317 $(TRADDIR
)/make-full-pattern-trad \
318 $(REFODIR
)/make-full-pattern-refo \
319 $(SWISSDIR
)/make-full-pattern-swiss
: \
320 $(SCRIPTDIR
)/trennmuster
/make-full-pattern.sh \
323 $(TRADDIR
)/make-full-pattern-trad
: $(TRADDIR
)/words.hyphenated.trad
324 $(call make-full-pattern
,$(TRADDIR
))
325 $(REFODIR
)/make-full-pattern-refo
: $(REFODIR
)/words.hyphenated.refo
326 $(call make-full-pattern
,$(REFODIR
))
327 $(SWISSDIR
)/make-full-pattern-swiss
: $(SWISSDIR
)/words.hyphenated.swiss
328 $(call make-full-pattern
,$(SWISSDIR
))
330 # $(1): pattern file name
331 # $(2): pattern directory
333 $(CAT
) $(DATADIR
)/$(1).1 $(bsnl
)\
334 |
$(SED
) -e
"s/@DATE@/$(DATE)/" $(bsnl
)\
335 -e
"s/@GIT_VERSION@/$(GIT_VERSION)/" $(bsnl
)\
336 -e
"s/@LEFTHYPHENMIN@/$(LEFTHYPHENMIN)/" $(bsnl
)\
337 -e
"s/@RIGHTHYPHENMIN@/$(RIGHTHYPHENMIN)/" > $@
$(bsnl
)\
338 && $(CAT
) $(2)/pattern.rules
>> $@
$(bsnl
)\
339 && $(CAT
) $(DATADIR
)/$(TRAD
).2 >> $@
$(bsnl
)\
340 && $(CAT
) $(2)/pattern
.8 >> $@
$(bsnl
)\
341 && $(CAT
) $(DATADIR
)/$(TRAD
).3 >> $@
344 $(TRADDIR
)/$(TRAD
)-$(DATE
).pat
: $(TRADDIR
)/pattern
.8 $(TRADDIR
)/pattern.rules
345 $(call make-pat-file
,$(TRAD
),$(TRADDIR
))
346 $(REFODIR
)/$(REFO
)-$(DATE
).pat
: $(REFODIR
)/pattern
.8 $(REFODIR
)/pattern.rules
347 $(call make-pat-file
,$(REFO
),$(REFODIR
))
348 $(SWISSDIR
)/$(SWISS
)-$(DATE
).pat
: $(SWISSDIR
)/pattern
.8 $(SWISSDIR
)/pattern.rules
349 $(call make-pat-file
,$(SWISS
),$(SWISSDIR
))
351 # $(1): arguments for `extract-tex.pl`
354 |
$(PERL
) $(SCRIPTDIR
)/wortliste
/extract-tex.pl
$(1) $(PERL_PATTYPE
) $(bsnl
)\
355 |
$(SED_PATTYPE
) $(bsnl
)\
360 $(TRADDIR
)/words.hyphenated.trad
: $(WORDLIST
)
361 $(call extract-tex
,-t
)
362 $(REFODIR
)/words.hyphenated.refo
: $(WORDLIST
)
364 $(SWISSDIR
)/words.hyphenated.swiss
: $(WORDLIST
)
365 $(call extract-tex
,-s
)
368 $(SED
) -e
"s/@DATE@/$(DATE)/" $(bsnl
)\
372 $(TRADDIR
)/$(TRAD
)-$(DATE
).
tex: $(DATADIR
)/$(TRAD
).
tex.in
374 $(REFODIR
)/$(REFO
)-$(DATE
).
tex: $(DATADIR
)/$(REFO
).
tex.in
376 $(SWISSDIR
)/$(SWISS
)-$(DATE
).
tex: $(DATADIR
)/$(SWISS
).
tex.in
381 # patterns for handling round 's' vs. long 'ſ'
384 # Word lists and patterns for converting words with only round 's' to
385 # words with distinction of round and long 's' according to the German
386 # orthograpy variant "de-Latf" (for texts typeset using *Fraktur*) .
388 # The patterns contain only round 's'. Hyphenation points after 's' indicate
389 # final 's' inside a word ('Aus-schuss' == 'Ausſchuſs').
391 SCHLUSS_S_DIR
= $(OUTDIR
)/schluss-s
392 SCHLUSS_S_QUASIHYPH
= $(SCRIPTDIR
)/spezialmuster
/lang_s
/final_s_quasihyph.py
394 .PHONY
: schluss-s schluss-s-1901 schluss-s-1996
395 schluss-s
: $(SCHLUSS_S_DIR
)/de_schluss-s.pat
396 schluss-s-1901
: $(SCHLUSS_S_DIR
)/de-1901_schluss-s.pat
397 schluss-s-1996
: $(SCHLUSS_S_DIR
)/de-1996_schluss-s.pat
399 # unhyphenated words with distinction of long-s and round-s
400 # (Auſsage, …, Zynismus)
401 exzerpte
/de-Latf
: $(WORDLIST
) $(S2LONG_S
)
402 $(S2LONG_S
) --drop-homonyms
-l
'de-1901,de-1996' < $< > $@
403 exzerpte
/de-Latf-1901
: $(WORDLIST
) $(S2LONG_S
)
404 $(S2LONG_S
) --drop-homonyms
-l de-1901
< $< > $@
405 exzerpte
/de-Latf-1996
: $(WORDLIST
) $(S2LONG_S
)
406 $(S2LONG_S
) --drop-homonyms
-l de-1996
< $< > $@
408 # words with hyphens following final "s" inside a word
409 # (Aus-sage, …, Zynis-mus)
410 $(SCHLUSS_S_DIR
)/de_schluss-s
: exzerpte
/de-Latf
411 $(SCHLUSS_S_QUASIHYPH
) < $< > $@
412 $(SCHLUSS_S_DIR
)/de-1901_schluss-s
: exzerpte
/de-Latf-1901
413 $(SCHLUSS_S_QUASIHYPH
) < $< > $@
414 $(SCHLUSS_S_DIR
)/de-1996_schluss-s
: exzerpte
/de-Latf-1996
415 $(SCHLUSS_S_QUASIHYPH
) < $< > $@
417 # patterns with breakpoints following final "s" inside a word
418 define schluss-s-muster
419 $(CHDIR
) $(SCHLUSS_S_DIR
) $(bsnl
)\
420 && $(SH
) $(SCRIPTDIR
)/trennmuster
/make-full-pattern.sh
$(bsnl
)\
421 $(<F
) $(DATADIR
)/german.tr
422 $(CAT
) $(SCHLUSS_S_DIR
)/pattern
.8 >> $@
425 $(SCHLUSS_S_DIR
)/de_schluss-s.pat
: $(SCHLUSS_S_DIR
)/de_schluss-s
426 $(call schluss-s-muster
)
427 $(SCHLUSS_S_DIR
)/de-1901_schluss-s.pat
: $(SCHLUSS_S_DIR
)/de-1901_schluss-s
428 $(call schluss-s-muster
)
429 $(SCHLUSS_S_DIR
)/de-1996_schluss-s.pat
: $(SCHLUSS_S_DIR
)/de-1996_schluss-s
430 $(call schluss-s-muster
)
434 # patterns for 'breaking up' typographic ligatures
437 # Word lists and patterns 'de_ligaturaufbruch': 'de-1901' (old orthography),
438 # 'de-1996' (reformed orthgraphy), and mixed. The final patterns contain
439 # hyphenation points at positions where ligatures like 'fl' must not
440 # occur ('Dorfladen' => 'Dorf-laden').
442 .PHONY
: de_ligaturaufbruch de-1901_ligaturaufbruch de-1996_ligaturaufbruch
443 de_ligaturaufbruch
: $(LIGA
)/de_ligaturaufbruch-
$(DATE
).pat
444 de-1901_ligaturaufbruch
: $(LIGA
)/de-1901_ligaturaufbruch-
$(DATE
).pat
445 de-1996_ligaturaufbruch
: $(LIGA
)/de-1996_ligaturaufbruch-
$(DATE
).pat
447 LIGA
= $(OUTDIR
)/ligaturaufbruch
449 # $(1): arguments for `-l` parameter of `sprachauszug.py`
450 define ligaturaufbruch-eingabe
452 $(SPRACHAUSZUG
) -l
$(1) -s
"morphemgrenzen,einfach" $(bsnl
)\
456 $(LIGA
)/de_ligaturaufbruch.eingabe
: $(WORDLIST
)
457 $(call ligaturaufbruch-eingabe
,"de-1901:de-CH-1901:de-1996:de-CH-1996")
458 $(LIGA
)/de-1901_ligaturaufbruch.eingabe
: $(WORDLIST
)
459 $(call ligaturaufbruch-eingabe
,"de-1901:de-CH-1901")
460 $(LIGA
)/de-1996_ligaturaufbruch.eingabe
: $(WORDLIST
)
461 $(call ligaturaufbruch-eingabe
,"de-1996:de-CH-1996")
463 define make-full-liga-pattern
464 $(CHDIR
) $(LIGA
) $(bsnl
)\
465 && $(SH
) $(SCRIPTDIR
)/trennmuster
/make-full-pattern.sh
$(bsnl
)\
466 $(<F
) $(DATADIR
)/german.tr
467 $(CAT
) $(LIGA
)/pattern
.8 >> $@
470 $(LIGA
)/de_ligaturaufbruch-
$(DATE
).pat
: $(LIGA
)/de_ligaturaufbruch.eingabe
471 $(call make-full-liga-pattern
)
472 $(LIGA
)/de-1901_ligaturaufbruch-
$(DATE
).pat
: $(LIGA
)/de-1901_ligaturaufbruch.eingabe
473 $(call make-full-liga-pattern
)
474 $(LIGA
)/de-1996_ligaturaufbruch-
$(DATE
).pat
: $(LIGA
)/de-1996_ligaturaufbruch.eingabe
475 $(call make-full-liga-pattern
)
483 # Exzerpte mit `sprachauszug.py`
485 STILFILTER
= $(SCRIPTDIR
)/lib
/py_wortliste
/stilfilter.py
487 exzerpte
/de-1996_morphemgrenzen
: $(WORDLIST
) $(STILFILTER
)
488 $(SPRACHAUSZUG
) -l de-1996 \
489 -s
"morphemgrenzen,einfach" < $< > $@
490 exzerpte
/de-1901_morphemgrenzen
: $(WORDLIST
) $(STILFILTER
)
491 $(SPRACHAUSZUG
) -l de-1901 \
492 -s
"morphemgrenzen,einfach" < $< > $@
493 exzerpte
/de-1996_hyphenmin3
: $(WORDLIST
) $(STILFILTER
)
494 $(SPRACHAUSZUG
) -l
"de-1996,de-1996-x-versal" \
495 -s
"standard,morphemisch,hyphenmin3,einfach" < $< > $@
496 exzerpte
/de-1996_gesangstext-syllabisch
: $(WORDLIST
) $(STILFILTER
)
497 $(SPRACHAUSZUG
) -l
"de-1996,de-1996-x-versal" \
498 -s
"syllabisch,gesangstext,einfach" < $< > $@
499 exzerpte
/de-1996_gesangstext-morphemisch
: $(WORDLIST
) $(STILFILTER
)
500 $(SPRACHAUSZUG
) -l
"de-1996,de-1996-x-versal" \
501 -s
"morphemisch,gesangstext,einfach" < $< > $@
503 # Wortliste mit Orthographie für Fraktursatz (s/ſ-Unterscheidung),
504 exzerpte
/wortliste-Latf
: $(WORDLIST
) $(S2LONG_S
)
505 $(S2LONG_S
) -w
< $< > $@