1 # -*- coding: utf-8 -*-
3 # This Makefile creates German hyphenation patterns in subdirectories
4 # $(TRAD) and $(REFO) for traditional and new orthography, respectively.
5 # Hyphenation patterns for traditional Swiss German are generated in
8 # The input data is in $(SRCDIR); the possible targets are `pattern-trad',
9 # `pattern-refo', and `pattern-swiss'. If no target (or target `all') is
10 # given, all patterns for all three targets are built.
12 # SRCDIR (and the other variables) can be easily modified as parameters
13 # while calling `make', e.g.
15 # make pattern-trad SRCDIR=~/git/wortliste
17 # If you add one of the (phony) targets `major', `fugen', or `suffix',
18 # patterns that only use major hyphenation points (`Haupttrennstellen')
19 # are created. Example:
21 # make major pattern-refo
23 # The output directories have `-major' (etc.) appended to their names.
25 # To control the used weights in the major hyphenation patterns, add
26 # variable `W=N', where `N' gives the quality: value 1 specifies the best
27 # hyphenation points only, value 2 both the best and second-best points,
28 # etc. The default is value 0, using all major hyphenation points.
32 # Dieses Makefile erzeugt deutsche Trennmuster in den
33 # Unterverzeichnissen $(TRAD) und $(REFO) für die traditionelle
34 # bzw. reformierte Rechtschreibung. Trennmuster für tradionelles
35 # deutschschweizerisches Deutsch werden Verzeichnis $(SWISS) erzeugt.
37 # Die Eingabedaten werden im Verzeichnis $(SRCDIR) erwartet; die möglichen
38 # Make-Ziele sind `pattern-trad', `pattern-refo' und `pattern-swiss'. Wenn
39 # kein Ziel angegeben ist (oder man das Ziel `all' verwendet), werden alle
40 # drei Trennmuster erzeugt.
42 # SRCDIR (und die anderen Variablen) kann man leicht beim Aufruf von
43 # `make' als Parameter modifizieren, z.B.
45 # make pattern-trad SRCDIR=~/git/wortliste
47 # Wird eines der zusätzlichen (künstlichen) Ziele `major', `fugen' oder
48 # `suffix' angegeben, werden Haupttrennstellmuster erzeugt.
52 # make major pattern-refo
54 # Die verwendeten Verzeichnisnamen sind die gleichen wie oben, allerdings
55 # mit einem angehängten `-major', `-fugen' bzw. `-suffix'.
57 # Diese Spezialmuster spiegeln die Auszeichnung in der Liste direkt wider.
58 # Sie haben nicht das Ziel, "gute" Trennungen in Texten zu erzeugen, sondern
59 # sind zum Testen der Konsistenz der Auszeichnung sowie zum "kategorisierten"
60 # Markieren der Trennstellen neuer Wörter gedacht.
62 # Bei `major' kann die Menge der verwendeten Haupttrennstellen mittels der
63 # Variable `W=N' (Wichtungs-Schwellwert)
64 # kontrolliert werden, wo `N' die Qualität angibt: Wert 1 selektiert
65 # nur die besten Haupttrennstellen, Wert 2 die besten und zweitbesten
66 # Haupttrennstellen usw. Der Standardwert für `W' ist 0; er gibt an, dass
67 # alle Haupttrennstellen verwendet werden sollen.
69 # Das Ziel `de-Latf' erzeugt (experimentelle) Wortlisten
70 # und (Quasi-) Trennmuster für die Wandlung von Wörtern in traditioneller oder
71 # reformierter Standardorthographie in der Variante mit Unterscheidung von
72 # langem und runden S (Binnen-S vs. Schluß-S) wie sie im Satz mit gebrochenen
73 # Schriften benötigt wird. Ein Beispiel für die Anwendung dieser Muster ist
74 # das Skript `skripte/python/patuse/long_s_conversion.py`
79 DATADIR
= $(SRCDIR
)/daten
80 EXZERPTDIR
= $(SRCDIR
)/exzerpte
81 PATTDIR
= $(SRCDIR
)/muster
82 SCRIPTDIR
= $(SRCDIR
)/skripte
83 LANGSDIR
= $(SCRIPTDIR
)/spezialmuster
/lang_s
86 ifneq ($(findstring major
,$(MAKECMDGOALS
)),)
88 # A single `-' gets removed; all other combinations of `-', `<', `>',
89 # and `=' are converted to a hyphen.
90 SEDMAJOR
= $(SED
) -e
'/[=<>-]/!n' \
93 -e
's/[=<>][=<>]*/-/g'
96 ifeq ($(words $(MAKECMDGOALS
)),1)
99 # This is to suppress the `nothing to be done' warning.
103 else ifneq ($(findstring fugen
,$(MAKECMDGOALS
)),)
105 # All combinations of `-', `<', `>', `<=', `=>' get removed,
106 # runs of `=' are converted to a hyphen.
107 SEDMAJOR
= $(SED
) -e
'/[=<>-]/!n' \
111 -e
's/[<>][<>]*//g' \
115 ifeq ($(words $(MAKECMDGOALS
)),1)
118 # This is to suppress the `nothing to be done' warning.
122 else ifneq ($(findstring suffix,$(MAKECMDGOALS
)),)
124 # All combinations of `-', `<', `=' get removed,
125 # runs of `>' are converted to a hyphen.
126 SEDMAJOR
= $(SED
) -e
'/[=<>-]/!n' \
128 -e
's/[<=][<=]*//g' \
132 ifeq ($(words $(MAKECMDGOALS
)),1)
135 # This is to suppress the `nothing to be done' warning.
145 TRAD
= dehypht-x
$(MAJOR
)
146 REFO
= dehyphn-x
$(MAJOR
)
147 SWISS
= dehyphts-x
$(MAJOR
)
149 LIGA
= $(PATTDIR
)/ligaturaufbruch
151 LC_ENVVARS
= LC_COLLATE
=de_DE.UTF-8 \
157 DATE
= $(shell date
'+%Y-%m-%d')
164 SPRACHAUSZUG
= $(PYTHON
) skripte
/trennmuster
/sprachauszug.py
167 SORT
= $(LC_ENVVARS
) sort -d \
168 |
$(LC_ENVVARS
) uniq
-i
170 LEFTHYPHENMIN
= $(strip $(shell $(SED
) 's/^\(..\).*/\1/;q' < $(DATADIR
)/german.tr
))
171 RIGHTHYPHENMIN
= $(strip $(shell $(SED
) 's/^..\(..\).*/\1/;q' < $(DATADIR
)/german.tr
))
173 GIT_VERSION
:= `$(CHDIR) $(SRCDIR); \
174 $(GIT) log --format=%H -1 HEAD --`
176 TRADDIR
= muster
/$(TRAD
)
177 REFODIR
= muster
/$(REFO
)
178 SWISSDIR
= muster
/$(SWISS
)
179 TRADFILES
= $(TRADDIR
)/$(TRAD
)-$(DATE
).pat
$(TRADDIR
)/$(TRAD
)-$(DATE
).
tex
180 REFOFILES
= $(REFODIR
)/$(REFO
)-$(DATE
).pat
$(REFODIR
)/$(REFO
)-$(DATE
).
tex
181 SWISSFILES
= $(SWISSDIR
)/$(SWISS
)-$(DATE
).pat
$(SWISSDIR
)/$(SWISS
)-$(DATE
).
tex
184 override SRCDIR
:= $(shell cd
$(SRCDIR
); $(PWD
))
187 all: pattern-trad pattern-refo pattern-swiss
189 .PHONY
: pattern-trad pattern-refo pattern-swiss major fugen
suffix
190 pattern-trad
: $(TRADFILES
)
191 pattern-refo
: $(REFOFILES
)
192 pattern-swiss
: $(SWISSFILES
)
194 # intermediate targets
198 .PHONY
: words-trad words-refo
199 words-trad
: $(TRADDIR
)/words.hyphenated.trad
200 words-refo
: $(REFODIR
)/words.hyphenated.refo
203 .PHONY
: pre-trad pre-refo pre-swiss
211 $(TRADFILES
) $(TRADDIR
)/words.hyphenated.trad
: pre-trad
212 $(REFOFILES
) $(REFODIR
)/words.hyphenated.refo
: pre-refo
213 $(SWISSFILES
) $(SWISSDIR
)/words.hyphenated.swiss
: pre-swiss
216 # GNU make supports creation of multiple targets by a single
217 # invocation of a recipe only for pattern rules, thus we have
218 # to use a `sentinel file' (using `echo' for the time stamp).
221 $(TRADDIR
)/pattern
.8 $(TRADDIR
)/pattern.rules
: $(TRADDIR
)/make-full-pattern-trad
223 $(TRADDIR
)/make-full-pattern-trad
: $(TRADDIR
)/words.hyphenated.trad
224 $(CHDIR
) $(TRADDIR
); \
225 $(SH
) $(SCRIPTDIR
)/trennmuster
/make-full-pattern.sh
$(<F
) $(DATADIR
)/german.tr
228 $(TRADDIR
)/$(TRAD
)-$(DATE
).pat
: $(TRADDIR
)/pattern
.8 $(TRADDIR
)/pattern.rules
229 $(CAT
) $(DATADIR
)/$(TRAD
).1 \
230 |
$(SED
) -e
"s/@DATE@/$(DATE)/" \
231 -e
"s/@GIT_VERSION@/$(GIT_VERSION)/" \
232 -e
"s/@LEFTHYPHENMIN@/$(LEFTHYPHENMIN)/" \
233 -e
"s/@RIGHTHYPHENMIN@/$(RIGHTHYPHENMIN)/" > $@
; \
234 $(CAT
) $(TRADDIR
)/pattern.rules
>> $@
; \
235 $(CAT
) $(DATADIR
)/$(TRAD
).2 >> $@
; \
236 $(CAT
) $(TRADDIR
)/pattern
.8 >> $@
; \
237 $(CAT
) $(DATADIR
)/$(TRAD
).3 >> $@
240 $(REFODIR
)/pattern
.8 $(REFODIR
)/pattern.rules
: $(REFODIR
)/make-full-pattern-refo
242 $(REFODIR
)/make-full-pattern-refo
: $(REFODIR
)/words.hyphenated.refo
243 $(CHDIR
) $(REFODIR
); \
244 $(SH
) $(SCRIPTDIR
)/trennmuster
/make-full-pattern.sh
$(<F
) $(DATADIR
)/german.tr
247 $(REFODIR
)/$(REFO
)-$(DATE
).pat
: $(REFODIR
)/pattern
.8 $(REFODIR
)/pattern.rules
248 $(CAT
) $(DATADIR
)/$(REFO
).1 \
249 |
$(SED
) -e
"s/@DATE@/$(DATE)/" \
250 -e
"s/@GIT_VERSION@/$(GIT_VERSION)/" \
251 -e
"s/@LEFTHYPHENMIN@/$(LEFTHYPHENMIN)/" \
252 -e
"s/@RIGHTHYPHENMIN@/$(RIGHTHYPHENMIN)/" > $@
; \
253 $(CAT
) $(REFODIR
)/pattern.rules
>> $@
; \
254 $(CAT
) $(DATADIR
)/$(REFO
).2 >> $@
; \
255 $(CAT
) $(REFODIR
)/pattern
.8 >> $@
; \
256 $(CAT
) $(DATADIR
)/$(REFO
).3 >> $@
259 $(SWISSDIR
)/pattern
.8 $(SWISSDIR
)/pattern.rules
: $(SWISSDIR
)/make-full-pattern-swiss
261 $(SWISSDIR
)/make-full-pattern-swiss
: $(SWISSDIR
)/words.hyphenated.swiss
262 $(CHDIR
) $(SWISSDIR
); \
263 $(SH
) $(SCRIPTDIR
)/trennmuster
/make-full-pattern.sh
$(<F
) $(DATADIR
)/german.tr
266 $(SWISSDIR
)/$(SWISS
)-$(DATE
).pat
: $(SWISSDIR
)/pattern
.8 $(SWISSDIR
)/pattern.rules
267 $(CAT
) $(DATADIR
)/$(SWISS
).1 \
268 |
$(SED
) -e
"s/@DATE@/$(DATE)/" \
269 -e
"s/@GIT_VERSION@/$(GIT_VERSION)/" \
270 -e
"s/@LEFTHYPHENMIN@/$(LEFTHYPHENMIN)/" \
271 -e
"s/@RIGHTHYPHENMIN@/$(RIGHTHYPHENMIN)/" > $@
; \
272 $(CAT
) $(SWISSDIR
)/pattern.rules
>> $@
; \
273 $(CAT
) $(DATADIR
)/$(SWISS
).2 >> $@
; \
274 $(CAT
) $(SWISSDIR
)/pattern
.8 >> $@
; \
275 $(CAT
) $(DATADIR
)/$(SWISS
).3 >> $@
278 $(TRADDIR
)/words.hyphenated.trad
: $(SRCDIR
)/$(WORDLIST
)
280 |
$(PERL
) $(SCRIPTDIR
)/wortliste
/extract-tex.pl
-t
-1 -U
$(PERLMAJOR
) \
284 $(REFODIR
)/words.hyphenated.refo
: $(SRCDIR
)/$(WORDLIST
)
286 |
$(PERL
) $(SCRIPTDIR
)/wortliste
/extract-tex.pl
-1 -U
$(PERLMAJOR
) \
290 $(SWISSDIR
)/words.hyphenated.swiss
: $(SRCDIR
)/$(WORDLIST
)
292 |
$(PERL
) $(SCRIPTDIR
)/wortliste
/extract-tex.pl
-s
-1 -U
$(PERLMAJOR
) \
297 $(TRADDIR
)/$(TRAD
)-$(DATE
).
tex: $(DATADIR
)/$(TRAD
).
tex.in
299 |
$(SED
) -e
"s/@DATE@/$(DATE)/" > $@
301 $(REFODIR
)/$(REFO
)-$(DATE
).
tex: $(DATADIR
)/$(REFO
).
tex.in
303 |
$(SED
) -e
"s/@DATE@/$(DATE)/" > $@
305 $(SWISSDIR
)/$(SWISS
)-$(DATE
).
tex: $(DATADIR
)/$(SWISS
).
tex.in
307 |
$(SED
) -e
"s/@DATE@/$(DATE)/" > $@
309 # Listen und Patterns de-Latf (deutsch, Latin script, fraktur;
310 # Orthographie für Satz mit gebrochenen Schriften und rundem und langem S)
312 # Wortlisten mit Langem-S: gemischt, de-1901 (alt), de-1996 (reform)
314 $(LATF
)/words-de-Latf.txt
: wortliste
$(LANGSDIR
)/s2long-s.py
316 $(PYTHON
) $(LANGSDIR
)/s2long-s.py
--drop-homonyms \
317 -l
"de-1901,de-1996" < $(WORDLIST
) > $(LATF
)/words-de-Latf.txt
319 $(LATF
)/words-de-1901-Latf.txt
: wortliste
$(LANGSDIR
)/s2long-s.py
321 $(PYTHON
) $(LANGSDIR
)/s2long-s.py
--drop-homonyms
-l de-1901 \
322 < $(WORDLIST
) > $(LATF
)/words-de-1901-Latf.txt
324 $(LATF
)/words-de-1996-Latf.txt
: wortliste
$(LANGSDIR
)/s2long-s.py
325 $(PYTHON
) $(LANGSDIR
)/s2long-s.py
--drop-homonyms
-l de-1996 \
326 < $(WORDLIST
) > $(LATF
)/words-de-1996-Latf.txt
328 # de-Latf...: Quasi-Trennstellen nach rund-s (aus-sagen == ausſagen)
330 .PHONY
: de-Latf de-1901-Latf de-1996-Latf
332 de-Latf
: $(LATF
)/de-Latf.pat
334 $(LATF
)/words-de-Latf.hyphenated
: $(LATF
)/words-de-Latf.txt
335 $(PYTHON
) $(LANGSDIR
)/de_Latf_quasihyph.py
< $< > $@
337 $(LATF
)/de-Latf.pat
: $(LATF
)/words-de-Latf.hyphenated
339 $(SH
) $(SCRIPTDIR
)/trennmuster
/make-full-pattern.sh
$(<F
) $(DATADIR
)/de-Latf.tr
340 $(CAT
) $(LATF
)/pattern
.8 >> $@
;
342 de-1901-Latf
: $(LATF
)/de-1901-Latf.pat
344 $(LATF
)/words-de-1901-Latf.hyphenated
: $(LATF
)/words-de-1901-Latf.txt
345 $(PYTHON
) $(LANGSDIR
)/de_Latf_quasihyph.py
< $< > $@
347 $(LATF
)/de-1901-Latf.pat
: $(LATF
)/words-de-1901-Latf.hyphenated
349 $(SH
) $(SCRIPTDIR
)/trennmuster
/make-full-pattern.sh
$(<F
) $(DATADIR
)/de-Latf.tr
350 $(CAT
) $(LATF
)/pattern
.8 >> $@
;
352 de-1996-Latf
: $(LATF
)/de-1996-Latf.pat
354 $(LATF
)/words-de-1996-Latf.hyphenated
: $(LATF
)/words-de-1996-Latf.txt
355 $(PYTHON
) $(LANGSDIR
)/de_Latf_quasihyph.py
< $< > $@
357 $(LATF
)/de-1996-Latf.pat
: $(LATF
)/words-de-1996-Latf.hyphenated
359 $(SH
) $(SCRIPTDIR
)/trennmuster
/make-full-pattern.sh
$(<F
) $(DATADIR
)/de-Latf.tr
360 $(CAT
) $(LATF
)/pattern
.8 >> $@
;
363 # Ligaturaufbruchmuster
365 .PHONY
: de_ligaturaufbruch de-1901_ligaturaufbruch de-1996_ligaturaufbruch
367 de_ligaturaufbruch
: $(LIGA
)/de_ligaturaufbruch-
$(DATE
).pat
369 $(LIGA
)/de_ligaturaufbruch.eingabe
: $(SRCDIR
)/$(WORDLIST
)
371 $(SPRACHAUSZUG
) -l
'de-1901,de-CH-1901,de-1996,de-CH-1996' -s
'morphemgrenzen,einfach' \
374 $(LIGA
)/de_ligaturaufbruch-
$(DATE
).pat
: $(LIGA
)/de_ligaturaufbruch.eingabe
376 $(SH
) $(SCRIPTDIR
)/trennmuster
/make-full-pattern.sh
$(<F
) $(DATADIR
)/de-Latf.tr
377 $(CAT
) $(LIGA
)/pattern
.8 >> $@
;
379 de-1901_ligaturaufbruch
: $(LIGA
)/de-1901_ligaturaufbruch-
$(DATE
).pat
381 $(LIGA
)/de-1901_ligaturaufbruch.eingabe
: $(SRCDIR
)/$(WORDLIST
)
383 $(SPRACHAUSZUG
) -l
'de-1901,de-CH-1901' -s
'morphemgrenzen,einfach' \
386 $(LIGA
)/de-1901_ligaturaufbruch-
$(DATE
).pat
: $(LIGA
)/de-1901_ligaturaufbruch.eingabe
388 $(SH
) $(SCRIPTDIR
)/trennmuster
/make-full-pattern.sh
$(<F
) $(DATADIR
)/de-Latf.tr
389 $(CAT
) $(LIGA
)/pattern
.8 >> $@
;
391 de-1996_ligaturaufbruch
: $(LIGA
)/de-1996_ligaturaufbruch-
$(DATE
).pat
393 $(LIGA
)/de-1996_ligaturaufbruch.eingabe
: $(SRCDIR
)/$(WORDLIST
)
395 $(SPRACHAUSZUG
) -l
'de-1996,de-CH-1996' -s
'morphemgrenzen,einfach' \
398 $(LIGA
)/de-1996_ligaturaufbruch-
$(DATE
).pat
: $(LIGA
)/de-1996_ligaturaufbruch.eingabe
400 $(SH
) $(SCRIPTDIR
)/trennmuster
/make-full-pattern.sh
$(<F
) $(DATADIR
)/de-Latf.tr
401 $(CAT
) $(LIGA
)/pattern
.8 >> $@
;
403 # Exzerpte mit `sprachauszug.py`
405 $(EXZERPTDIR
)/de-1996_morphemgrenzen
:
406 $(MKDIR
) $(EXZERPTDIR
)
407 $(SPRACHAUSZUG
) -l de-1996 \
408 -s
'morphemgrenzen,einfach' < $(WORDLIST
) > $@
410 $(EXZERPTDIR
)/de-1901_morphemgrenzen
:
411 $(MKDIR
) $(EXZERPTDIR
)
412 $(SPRACHAUSZUG
) -l de-1901 \
413 -s
'morphemgrenzen,einfach' < $(WORDLIST
) > $@
415 $(EXZERPTDIR
)/de-1996_hyphenmin3
:
416 $(MKDIR
) $(EXZERPTDIR
)
417 $(SPRACHAUSZUG
) -l
'de-1996,de-1996-x-versal' \
418 -s
'standard,morphemisch,hyphenmin3,einfach' < $(WORDLIST
) > $@
420 $(EXZERPTDIR
)/de-1996_gesangstext
:
421 $(MKDIR
) $(EXZERPTDIR
)
422 $(SPRACHAUSZUG
) -l
'de-1996,de-1996-x-versal' \
423 -s
'syllabisch,gesangstext,einfach' < $(WORDLIST
) > $@