Vervollständige dreibuchstabige Wörter mit Eszett.
[wortliste.git] / skripte / diff-patgen-input.sh
blobc74aebe52a6a487aef9f7d4b92600e5c42014382
1 #!/bin/sh
2 # -*- coding: utf-8 -*-
4 # Dieses Skript erzeugt Differenzbilder (diff) für die
5 # Patgen-Eingabelisten zwischen zwei angegebenen Commits. Wird nur ein
6 # Commit angegeben, wird als Zielcommit "master" verwendet. Die
7 # Ausgabedateien werden als Dateien
9 # dehyph*-x/<Start-Commit-Hash>-<Ziel-Commit-Hash>.diff
11 # in Verzeichnissen gespeichert, die der jeweiligen Rechtschreibung
12 # entsprechen. Start- und Ziel-Commit können in jeder gültigen
13 # Git-Syntax angegeben werden. Für die Dateinamen werden die
14 # entsprechenden abgekürzten alphanumerischen Commit-Hashes
15 # verwendet.
17 # Aufruf: sh diff-patgen-input.sh <start commit> [<ziel commit>]
20 # Eingabe: <start commit> Ein Start-Commit.
21 # <ziel commit> Ein optionaler Ziel-Commit.
23 # Ausgabe:
24 # dehyphn-x/<hashes>.diff Differenzbild refromierte Rechtschreibung
25 # dehypht-x/<hashes>.diff Differenzbild traditionelle Rechtschreibung
26 # dehyphts-x/<hashes>.diff Differenzbild traditionelle Rechtschreibung
27 # in der Schweiz.
29 if test $# -eq 0
30 then
31 echo 'usage: diff-patgen-input <start commit> [<target commit>]'
32 echo ''
33 echo 'Create diffs for patgen input lists between <start commit> and'
34 echo '<target commit> (by default "master") and save them as files'
35 echo 'dehyph*-x/<start commit hash>-<target commit hash>.diff in'
36 echo 'directories corresponding to the spelling.'
37 exit 1
39 FROMCOMMIT=$1
40 if test $# -eq 1
41 then
42 TOCOMMIT=HEAD
43 else
44 TOCOMMIT=$2
46 typeset GITDATA=`git log -1 --format=%ci-%H $FROMCOMMIT --`
47 FROMDATE=${GITDATA:0:10}+${GITDATA:11:2}-${GITDATA:14:2}-${GITDATA:17:2}
48 FROMHASH=${GITDATA:26}
49 if test -z $FROMHASH
50 then
51 echo 'diff-patgen-input.sh: error identifying start commit hash: ' $FROMCOMMIT
52 exit 1
54 typeset GITDATA=`git log -1 --format=%ci-%H $TOCOMMIT --`
55 TODATE=${GITDATA:0:10}+${GITDATA:11:2}-${GITDATA:14:2}-${GITDATA:17:2}
56 TOHASH=${GITDATA:26}
57 if test -z $TOHASH
58 then
59 echo 'diff-patgen-input.sh: error identifying target commit hash: ' $TOCOMMIT
60 exit 1
62 # Change to repository root directory. Double quotes are intentional to
63 # avoid an empty argument to cd.
64 cd "`git rev-parse --show-toplevel`"
65 # Write all output to a single top-level directory.
66 typeset OUTPUTDIR="+++diff-patgen-input+++"
70 # Function definition. If not already present, place a copy of a
71 # commit's working copy in a directory 'wl-<commit hash>'.
72 get_working_copy() {
73 typeset commit=$1 commitdate=$2
74 typeset commitdir=${OUTPUTDIR}/$commitdate-$commit
75 if test ! -d $commitdir
76 then
77 git archive --format=tar --prefix=$commitdir/ $commit | tar xf -
81 # Function definition.
82 create_patgen_list() {
83 typeset commit=$1 commitdate=$2 patgenlist=$3
84 typeset commitdir=${OUTPUTDIR}/$commitdate-$commit
85 echo "Making ${commit:0:7} file $patgenlist."
86 if test ! -e $commitdir/$patgenlist
87 then
88 # 'make -C $commitdir $patgenlist' doesn't work reliably on Git
89 # for Windows shell.
90 (cd $commitdir && make $patgenlist > /dev/null)
94 # Function definition.
95 diff_patgen_list() {
96 typeset fromcommit=$1 fromcommitdate=$2 tocommit=$3 tocommitdate=$4 dehyph=$5 spell=$6
97 typeset fromcommitdir=${OUTPUTDIR}/$fromcommitdate-$fromcommit tocommitdir=${OUTPUTDIR}/$tocommitdate-$tocommit patgenlist=$dehyph/words.hyphenated.$spell difffile=${fromcommit:0:7}-${tocommit:0:7}.diff
98 create_patgen_list $fromcommit $fromcommitdate $patgenlist
99 create_patgen_list $tocommit $tocommitdate $patgenlist
100 if test ! -d ${OUTPUTDIR}/$dehyph; then mkdir ${OUTPUTDIR}/$dehyph; fi
101 diff $fromcommitdir/$patgenlist $tocommitdir/$patgenlist > ${OUTPUTDIR}/$dehyph/$difffile
102 gawk -f skripte/diff-patgen-input.awk -v ftr=daten/german.tr ${OUTPUTDIR}/$dehyph/$difffile
105 # Function definition.
106 count_differences() {
107 typeset fromcommit=$1 tocommit=$2 dehyph=$3 variety=$4 summaryfile=$5
108 typeset difffile=${fromcommit:0:7}-${tocommit:0:7}.diff
109 n_added=`wc -l ${OUTPUTDIR}/$dehyph/$difffile.added`
110 n_added=${n_added%% *}
111 n_removed=`wc -l ${OUTPUTDIR}/$dehyph/$difffile.removed`
112 n_removed=${n_removed%% *}
113 n_hyph=`wc -l ${OUTPUTDIR}/$dehyph/$difffile.hyph`
114 n_hyph=${n_hyph%% *}
115 printf " %-21s %11d %8d %10d\n" "${variety}" $n_added $n_removed $n_hyph >> ${summaryfile}
121 echo "Diff'ing patgen input files."
122 printf "from: %7s %10s %s\n" ${FROMHASH:0:7} ${FROMDATE:0:10} $FROMCOMMIT
123 printf "to: %7s %10s %s\n" ${TOHASH:0:7} ${TODATE:0:10} $TOCOMMIT
124 # Get commit's working copies.
125 get_working_copy $FROMHASH $FROMDATE
126 get_working_copy $TOHASH $TODATE
127 # Diff patgen lists.
128 diff_patgen_list $FROMHASH $FROMDATE $TOHASH $TODATE dehypht-x trad
129 diff_patgen_list $FROMHASH $FROMDATE $TOHASH $TODATE dehyphts-x swiss
130 diff_patgen_list $FROMHASH $FROMDATE $TOHASH $TODATE dehyphn-x refo
131 # Write summary file.
132 typeset SUMMARYFILE=${OUTPUTDIR}/CHANGES.table.txt
133 echo " Rechtschreibung hinzugefügt entfernt korrigiert" > $SUMMARYFILE
134 echo " ---------------------------------------------------------------" >> $SUMMARYFILE
135 count_differences $FROMHASH $TOHASH dehypht-x "traditionell (DE, AT)" $SUMMARYFILE
136 count_differences $FROMHASH $TOHASH dehyphts-x "traditionell (CH)" $SUMMARYFILE
137 count_differences $FROMHASH $TOHASH dehyphn-x "reformiert" $SUMMARYFILE