1 # -*- coding: utf-8 -*-
3 # (See below for a German introduction.)
5 # This Makefile creates German hyphenation patterns in subdirectories
6 # `$(TRAD)` and `$(REFO)` for traditional and new orthography, respectively.
7 # Hyphenation patterns for traditional Swiss German are generated in
8 # directory `$(SWISS)`.
10 # The input data is expected to be in `$(SRCDIR)`, which by default is set
11 # to the directory containing the Makefile. Output goes to directory
12 # `$(OUTDIR)`, which by default is set to './muster'.
14 # The possible targets are `pattern-trad`, `pattern-refo`, and
15 # `pattern-swiss`. If no target (or target `all`) is given, all patterns
16 # for all three targets are built.
21 # mkdir build-patterns
23 # make --makefile=~/git/wortliste/Makefile OUTDIR=. pattern-trad
26 # If you add one of the (phony) targets `major`, `fugen`, or `suffix`,
27 # patterns that only use major hyphenation points ('Haupttrennstellen')
28 # are created. Example:
31 # make major pattern-refo
34 # The output directories have `-major` (etc.) appended to their names. Note
35 # that these special patterns reflect the markup in the `wortliste` file;
36 # they are not intended to produce patterns for 'good' hyphenation in text
37 # but rather to test the consistency of the markup, and to assist in adding
40 # To control the used weights in the major hyphenation patterns, add
41 # variable `W=N`, where `N` gives the quality: value 1 specifies the best
42 # hyphenation points only, value 2 both the best and second-best points,
43 # etc. The default is value 0, using all major hyphenation points.
45 # The targets `de-Latf`, `de-Latf-1901`, and `de-Latf-1996` create
46 # (experimental) patterns and word lists for converting words in traditional
47 # and new orthography that make a distinction between long and round S as
48 # needed for typesetting with Fraktur fonts. An example for applying these
49 # patterns is the script
50 # `skripte/python/patuse/long_s_conversion.py`.
53 # Dieses Makefile erzeugt deutsche Trennmuster in den Unterverzeichnissen
54 # `$(TRAD)` und `$(REFO)` für die traditionelle bzw. reformierte
55 # Rechtschreibung. Trennmuster für tradionelles deutschschweizerisches
56 # Deutsch werden Verzeichnis `$(SWISS)` erzeugt.
58 # Die Eingabedaten werden im Verzeichnis `$(SRCDIR)` erwartet, welches
59 # standardmäßig identisch zum dem Verzeichnis ist, welches die
60 # `Makefile`-Datei enthält. Die Ausgabe wird in Verzeichnis `$(OUTDIR)`
61 # erzeugt, welches standardmäßig den Namen './muster' hat.
63 # Die möglichen Make-Ziele sind `pattern-trad`, `pattern-refo` und
64 # `pattern-swiss`. Wenn kein Ziel angegeben ist (oder man das Ziel `all`
65 # verwendet), werden alle drei Trennmuster erzeugt.
70 # mkdir build-patterns
72 # make --makefile=~/git/wortliste/Makefile OUTDIR=. pattern-trad
75 # Wird eines der zusätzlichen (künstlichen) Ziele `major`, `fugen` oder
76 # `suffix` angegeben, werden Haupttrennstellmuster erzeugt.
81 # make major pattern-refo
84 # Die verwendeten Verzeichnisnamen sind die gleichen wie oben, allerdings
85 # mit einem angehängten `-major`, `-fugen` bzw. `-suffix`.
87 # Diese Spezialmuster spiegeln die Auszeichnung in der Liste direkt wider.
88 # Sie haben nicht das Ziel, "gute" Trennungen in Texten zu erzeugen, sondern
89 # sind zum Testen der Konsistenz der Auszeichnung sowie zum "kategorisierten"
90 # Markieren der Trennstellen neuer Wörter gedacht.
92 # Bei `major` kann die Menge der verwendeten Haupttrennstellen mittels der
93 # Variable `W=N' (Wichtungs-Schwellwert) kontrolliert werden, wo `N` die
94 # Qualität angibt: Wert 1 selektiert nur die besten Haupttrennstellen,
95 # Wert 2 die besten und zweitbesten Haupttrennstellen usw. Der Standardwert
96 # für `W` ist 0; er gibt an, dass alle Haupttrennstellen verwendet werden
99 # Die Ziele `de-Latf`, `de-Latf-1901` und `de-Latf-1996` erzeugen
100 # (experimentelle) Wortlisten und (Quasi-) Trennmuster für die Wandlung von
101 # Wörtern in traditioneller oder reformierter Standardorthographie in der
102 # Variante mit Unterscheidung von langem und runden S (Binnen-S vs.
103 # Schluß-S), wie sie im Satz mit gebrochenen Schriften benötigt wird. Ein
104 # Beispiel für die Anwendung dieser Muster ist das Skript
105 # `skripte/python/patuse/long_s_conversion.py`.
109 SRCDIR
= $(dir $(realpath
$(lastword
$(MAKEFILE_LIST
))))
110 DATADIR
= $(SRCDIR
)/daten
111 SCRIPTDIR
= $(SRCDIR
)/skripte
112 LANGSDIR
= $(SCRIPTDIR
)/spezialmuster
/lang_s
113 WORDLIST
= $(SRCDIR
)/wortliste
117 .PHONY
: major fugen
suffix
119 ifneq ($(findstring major
,$(MAKECMDGOALS
)),)
121 # A single `-' gets removed; all other combinations of `-', `<', `>',
122 # and `=' are converted to a hyphen.
123 SED_PATTYPE
= $(SED
) -e
'/[=<>-]/!n' \
126 -e
's/[=<>][=<>]*/-/g'
127 PERL_PATTYPE
= -g
$(W
)
129 ifeq ($(words $(MAKECMDGOALS
)),1)
132 # This is to suppress the `nothing to be done' warning.
136 else ifneq ($(findstring fugen
,$(MAKECMDGOALS
)),)
138 # All combinations of `-', `<', `>', `<=', `=>' get removed,
139 # runs of `=' are converted to a hyphen.
140 SED_PATTYPE
= $(SED
) -e
'/[=<>-]/!n' \
144 -e
's/[<>][<>]*//g' \
146 PERL_PATTYPE
= -g
$(W
)
148 ifeq ($(words $(MAKECMDGOALS
)),1)
151 # This is to suppress the `nothing to be done' warning.
155 else ifneq ($(findstring suffix,$(MAKECMDGOALS
)),)
157 # All combinations of `-', `<', `=' get removed,
158 # runs of `>' are converted to a hyphen.
159 SED_PATTYPE
= $(SED
) -e
'/[=<>-]/!n' \
161 -e
's/[<=][<=]*//g' \
163 PERL_PATTYPE
= -g
$(W
)
165 ifeq ($(words $(MAKECMDGOALS
)),1)
168 # This is to suppress the `nothing to be done' warning.
178 TRAD
= dehypht-x
$(PATTYPE
)
179 REFO
= dehyphn-x
$(PATTYPE
)
180 SWISS
= dehyphts-x
$(PATTYPE
)
182 LIGA
= $(OUTDIR
)/ligaturaufbruch
184 LC_ENVVARS
= LC_COLLATE
=de_DE.UTF-8 \
190 DATE
= $(shell date
'+%Y-%m-%d')
197 SPRACHAUSZUG
= $(PYTHON
) $(SCRIPTDIR
)/wortliste
/sprachauszug.py
200 SORT
= $(LC_ENVVARS
) sort -d \
201 |
$(LC_ENVVARS
) uniq
-i
203 LEFTHYPHENMIN
= $(strip $(shell $(SED
) 's/^\(..\).*/\1/;q' < $(DATADIR
)/german.tr
))
204 RIGHTHYPHENMIN
= $(strip $(shell $(SED
) 's/^..\(..\).*/\1/;q' < $(DATADIR
)/german.tr
))
206 GIT_VERSION
:= `$(CHDIR) $(SRCDIR) \
207 && $(GIT) log --format=%H -1 HEAD --`
209 TRADDIR
= $(OUTDIR
)/$(TRAD
)
210 REFODIR
= $(OUTDIR
)/$(REFO
)
211 SWISSDIR
= $(OUTDIR
)/$(SWISS
)
212 TRADFILES
= $(TRADDIR
)/$(TRAD
)-$(DATE
).pat
$(TRADDIR
)/$(TRAD
)-$(DATE
).
tex
213 REFOFILES
= $(REFODIR
)/$(REFO
)-$(DATE
).pat
$(REFODIR
)/$(REFO
)-$(DATE
).
tex
214 SWISSFILES
= $(SWISSDIR
)/$(SWISS
)-$(DATE
).pat
$(SWISSDIR
)/$(SWISS
)-$(DATE
).
tex
216 # This macro defines a backslash followed by a newline. We use it to
217 # beautify canned recipes, avoiding overlong lines in the make output.
223 # A comma cannot be part of an argument of `call`; we have to use a variable
228 override SRCDIR
:= $(shell cd
$(SRCDIR
) && $(PWD
))
231 all: pattern-trad pattern-refo pattern-swiss
233 .PHONY
: pattern-trad pattern-refo pattern-swiss
234 pattern-trad
: $(TRADFILES
)
235 pattern-refo
: $(REFOFILES
)
236 pattern-swiss
: $(SWISSFILES
)
238 # intermediate targets
242 .PHONY
: words-trad words-refo words-swiss
243 words-trad
: $(TRADDIR
)/words.hyphenated.trad
244 words-refo
: $(REFODIR
)/words.hyphenated.refo
245 words-swiss
: $(REFODIR
)/words.hyphenated.swiss
248 .PHONY
: pre-trad pre-refo pre-swiss
256 $(TRADFILES
) $(TRADDIR
)/words.hyphenated.trad
: pre-trad
257 $(REFOFILES
) $(REFODIR
)/words.hyphenated.refo
: pre-refo
258 $(SWISSFILES
) $(SWISSDIR
)/words.hyphenated.swiss
: pre-swiss
260 # GNU make supports creation of multiple targets by a single
261 # invocation of a recipe only for pattern rules, thus we have
262 # to use a `sentinel file' (using `echo' for the time stamp).
264 $(TRADDIR
)/pattern
.8 $(TRADDIR
)/pattern.rules
: $(TRADDIR
)/make-full-pattern-trad
265 $(REFODIR
)/pattern
.8 $(REFODIR
)/pattern.rules
: $(REFODIR
)/make-full-pattern-refo
266 $(SWISSDIR
)/pattern
.8 $(SWISSDIR
)/pattern.rules
: $(SWISSDIR
)/make-full-pattern-swiss
269 # $(1): pattern directory
270 define make-full-pattern
271 $(CHDIR
) $(1) $(bsnl
)\
272 && $(SH
) $(SCRIPTDIR
)/trennmuster
/make-full-pattern.sh
$(bsnl
)\
273 $(<F
) $(DATADIR
)/german.tr
277 $(TRADDIR
)/make-full-pattern-trad
: $(TRADDIR
)/words.hyphenated.trad
278 $(call make-full-pattern
,$(TRADDIR
))
279 $(REFODIR
)/make-full-pattern-refo
: $(REFODIR
)/words.hyphenated.refo
280 $(call make-full-pattern
,$(REFODIR
))
281 $(SWISSDIR
)/make-full-pattern-swiss
: $(SWISSDIR
)/words.hyphenated.swiss
282 $(call make-full-pattern
,$(SWISSDIR
))
284 # $(1): pattern file name
285 # $(2): pattern directory
287 $(CAT
) $(DATADIR
)/$(1).1 $(bsnl
)\
288 |
$(SED
) -e
"s/@DATE@/$(DATE)/" $(bsnl
)\
289 -e
"s/@GIT_VERSION@/$(GIT_VERSION)/" $(bsnl
)\
290 -e
"s/@LEFTHYPHENMIN@/$(LEFTHYPHENMIN)/" $(bsnl
)\
291 -e
"s/@RIGHTHYPHENMIN@/$(RIGHTHYPHENMIN)/" > $@
$(bsnl
)\
292 && $(CAT
) $(2)/pattern.rules
>> $@
$(bsnl
)\
293 && $(CAT
) $(DATADIR
)/$(TRAD
).2 >> $@
$(bsnl
)\
294 && $(CAT
) $(2)/pattern
.8 >> $@
$(bsnl
)\
295 && $(CAT
) $(DATADIR
)/$(TRAD
).3 >> $@
298 $(TRADDIR
)/$(TRAD
)-$(DATE
).pat
: $(TRADDIR
)/pattern
.8 $(TRADDIR
)/pattern.rules
299 $(call make-pat-file
,$(TRAD
),$(TRADDIR
))
300 $(REFODIR
)/$(REFO
)-$(DATE
).pat
: $(REFODIR
)/pattern
.8 $(REFODIR
)/pattern.rules
301 $(call make-pat-file
,$(REFO
),$(REFODIR
))
302 $(SWISSDIR
)/$(SWISS
)-$(DATE
).pat
: $(SWISSDIR
)/pattern
.8 $(SWISSDIR
)/pattern.rules
303 $(call make-pat-file
,$(SWISS
),$(SWISSDIR
))
305 # $(1): arguments for `extract-tex.pl`
308 |
$(PERL
) $(SCRIPTDIR
)/wortliste
/extract-tex.pl
$(1) $(PERL_PATTYPE
) $(bsnl
)\
309 |
$(SED_PATTYPE
) $(bsnl
)\
313 $(TRADDIR
)/words.hyphenated.trad
: $(WORDLIST
)
314 $(call extract-tex
,-t
-1 -U
)
315 $(REFODIR
)/words.hyphenated.refo
: $(WORDLIST
)
316 $(call extract-tex
,-1 -U
)
317 $(SWISSDIR
)/words.hyphenated.swiss
: $(WORDLIST
)
318 $(call extract-tex
,-s
-1 -U
)
321 $(SED
) -e
"s/@DATE@/$(DATE)/" $(bsnl
)\
325 $(TRADDIR
)/$(TRAD
)-$(DATE
).
tex: $(DATADIR
)/$(TRAD
).
tex.in
327 $(REFODIR
)/$(REFO
)-$(DATE
).
tex: $(DATADIR
)/$(REFO
).
tex.in
329 $(SWISSDIR
)/$(SWISS
)-$(DATE
).
tex: $(DATADIR
)/$(SWISS
).
tex.in
332 # Listen und Patterns de-Latf (deutsch, Latin script, fraktur;
333 # Orthographie für Satz mit gebrochenen Schriften und rundem und langem S)
335 .PHONY
: de-Latf de-Latf-1901 de-Latf-1996
336 de-Latf
: $(LATF
)/de-Latf.pat
337 de-Latf-1901
: $(LATF
)/de-Latf-1901.pat
338 de-Latf-1996
: $(LATF
)/de-Latf-1996.pat
340 # Wortlisten mit Langem-S: gemischt, de-1901 (alt), de-1996 (reform)
342 # $(1): arguments for `-l` parameter of `s2long-s.py`
345 $(PYTHON
) $(LANGSDIR
)/s2long-s.py
--drop-homonyms
-l
$(1) $(bsnl
)\
349 $(LATF
)/words-de-Latf.txt
: $(WORDLIST
) $(LANGSDIR
)/s2long-s.py
350 $(call s2long-s
,"de-1901$(,)de-1996")
351 $(LATF
)/words-de-Latf-1901.txt
: $(WORDLIST
) $(LANGSDIR
)/s2long-s.py
352 $(call s2long-s
,"de-1901")
353 $(LATF
)/words-de-Latf-1996.txt
: $(WORDLIST
) $(LANGSDIR
)/s2long-s.py
354 $(call s2long-s
,"de-1996")
356 # de-Latf...: Quasi-Trennstellen nach rund-s (aus-sagen == ausſagen)
359 $(PYTHON
) $(LANGSDIR
)/de_Latf_quasihyph.py
$(bsnl
)\
363 $(LATF
)/words-de-Latf.hyphenated
: $(LATF
)/words-de-Latf.txt
365 $(LATF
)/words-de-Latf-1901.hyphenated
: $(LATF
)/words-de-Latf-1901.txt
367 $(LATF
)/words-de-Latf-1996.hyphenated
: $(LATF
)/words-de-Latf-1996.txt
370 define make-full-latf-pattern
371 $(CHDIR
) $(LATF
) $(bsnl
)\
372 && $(SH
) $(SCRIPTDIR
)/trennmuster
/make-full-pattern.sh
$(bsnl
)\
373 $(<F
) $(DATADIR
)/de-Latf.tr
374 $(CAT
) $(LATF
)/pattern
.8 >> $@
377 $(LATF
)/de-Latf.pat
: $(LATF
)/words-de-Latf.hyphenated
378 $(call make-full-latf-pattern
)
379 $(LATF
)/de-Latf-1901.pat
: $(LATF
)/words-de-Latf-1901.hyphenated
380 $(call make-full-latf-pattern
)
381 $(LATF
)/de-Latf-1996.pat
: $(LATF
)/words-de-Latf-1996.hyphenated
382 $(call make-full-latf-pattern
)
385 # Ligaturaufbruchmuster
387 .PHONY
: de_ligaturaufbruch de-1901_ligaturaufbruch de-1996_ligaturaufbruch
388 de_ligaturaufbruch
: $(LIGA
)/de_ligaturaufbruch-
$(DATE
).pat
389 de-1901_ligaturaufbruch
: $(LIGA
)/de-1901_ligaturaufbruch-
$(DATE
).pat
390 de-1996_ligaturaufbruch
: $(LIGA
)/de-1996_ligaturaufbruch-
$(DATE
).pat
392 # $(1): arguments for `-l` parameter of `sprachauszug.py`
393 define ligaturaufbruch-eingabe
395 $(SPRACHAUSZUG
) -l
$(1) -s
"morphemgrenzen,einfach" $(bsnl
)\
399 $(LIGA
)/de_ligaturaufbruch.eingabe
: $(WORDLIST
)
400 $(call ligaturaufbruch-eingabe
,"de-1901$(,)de-CH-1901$(,)de-1996$(,)de-CH-1996")
401 $(LIGA
)/de-1901_ligaturaufbruch.eingabe
: $(WORDLIST
)
402 $(call ligaturaufbruch-eingabe
,"de-1901$(,)de-CH-1901")
403 $(LIGA
)/de-1996_ligaturaufbruch.eingabe
: $(WORDLIST
)
404 $(call ligaturaufbruch-eingabe
,"de-1996$(,)de-CH-1996")
406 define make-full-liga-pattern
407 $(CHDIR
) $(LIGA
) $(bsnl
)\
408 && $(SH
) $(SCRIPTDIR
)/trennmuster
/make-full-pattern.sh
$(bsnl
)\
409 $(<F
) $(DATADIR
)/de-Latf.tr
410 $(CAT
) $(LIGA
)/pattern
.8 >> $@
413 $(LIGA
)/de_ligaturaufbruch-
$(DATE
).pat
: $(LIGA
)/de_ligaturaufbruch.eingabe
414 $(call make-full-liga-pattern
)
415 $(LIGA
)/de-1901_ligaturaufbruch-
$(DATE
).pat
: $(LIGA
)/de-1901_ligaturaufbruch.eingabe
416 $(call make-full-liga-pattern
)
417 $(LIGA
)/de-1996_ligaturaufbruch-
$(DATE
).pat
: $(LIGA
)/de-1996_ligaturaufbruch.eingabe
418 $(call make-full-liga-pattern
)
421 # Exzerpte mit `sprachauszug.py`
423 exzerpte
/de-1996_morphemgrenzen
:
425 $(SPRACHAUSZUG
) -l de-1996 \
426 -s
"morphemgrenzen,einfach" < $(WORDLIST
) > $@
427 exzerpte
/de-1901_morphemgrenzen
:
429 $(SPRACHAUSZUG
) -l de-1901 \
430 -s
"morphemgrenzen,einfach" < $(WORDLIST
) > $@
431 exzerpte
/de-1996_hyphenmin3
:
433 $(SPRACHAUSZUG
) -l
"de-1996,de-1996-x-versal" \
434 -s
"standard,morphemisch,hyphenmin3,einfach" < $(WORDLIST
) > $@
435 exzerpte
/de-1996_gesangstext
:
437 $(SPRACHAUSZUG
) -l
"de-1996,de-1996-x-versal" \
438 -s
"syllabisch,gesangstext,einfach" < $(WORDLIST
) > $@