From 41be3f51af0535ea95837d7d8c2c5827aa67a98c Mon Sep 17 00:00:00 2001
From: Guenter Milde <milde@users.sf.net>
Date: Fri, 23 May 2014 00:22:33 +0200
Subject: [PATCH] Python-Skript Update.

---
 skripte/python/abgleich_neueintraege.py    | 12 ++++++++----
 skripte/python/abgleich_sprachvarianten.py | 13 ++++++++++++-
 skripte/python/expand_teilwoerter.py       |  3 +--
 3 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/skripte/python/abgleich_neueintraege.py b/skripte/python/abgleich_neueintraege.py
index 7c2a00e..e09bee4 100644
--- a/skripte/python/abgleich_neueintraege.py
+++ b/skripte/python/abgleich_neueintraege.py
@@ -19,6 +19,7 @@
 
 import re, sys, codecs, copy, os
 from werkzeug import WordFile, WordEntry, join_word, toggle_case
+from expand_teilwoerter import expand_wordfile
 
 # Konfiguration
 # -------------
@@ -706,16 +707,19 @@ if __name__ == '__main__':
 
 # `Wortliste` einlesen::
 
-
-    wordfile = WordFile('wortliste-expandiert') # + Teilwort-Entries
-    words = wordfile.asdict()
+    wordfile = WordFile('../../wortliste')
+    words = expand_wordfile(wordfile)
+    
+    # # schon expandierte Liste:
+    # wordfile = WordFile('wortliste-expandiert') # + Teilwort-Entries
+    # words = wordfile.asdict()
+    
 
     neuwortdatei = "zusatzwörter-de-1996-hunspell-compact"
     neueintraege = []
     neueintraege_grossklein = []
     restwoerter = []
 
-
 # Erstellen der neuen Einträge::
 
     for line in open(neuwortdatei):
diff --git a/skripte/python/abgleich_sprachvarianten.py b/skripte/python/abgleich_sprachvarianten.py
index 4901af6..e990515 100755
--- a/skripte/python/abgleich_sprachvarianten.py
+++ b/skripte/python/abgleich_sprachvarianten.py
@@ -72,10 +72,21 @@ if __name__ == '__main__':
         entry = copy.copy(oldentry)
         sprachabgleich(entry)
         if oldentry == entry and u'ss' in entry[0]:
+            for w in entry[1:]:
+                if not w.startswith(u'-'):
+                    break
             try:
-                sprachabgleich(entry, words[entry[0].replace(u'ss', u'ß')])
+                sprachabgleich(entry, words[join_word(w.replace(u'ss', u'ß'))])
             except KeyError:
                 # print entry[0].replace(u'ss', u'ß'), "fehlt"
+                if entry.get('de-1901-x-GROSS'):
+                    wort1901 = entry.get('de-1901-x-GROSS')
+                    wort1901 = wort1901.replace(u'sst', u'ßt')
+                    wort1901 = wort1901.replace(u'ss=', u'ß=')
+                    wort1901 = wort1901.replace(u'-ss', u'-ß')
+                    wort1901 = re.sub(u'ss$', u'ß', wort1901)
+                    if not u'/' in wort1901 and len(wort1901)>3:
+                        print u'%s;-2-;%s;-4-' % (join_word(wort1901), wort1901)
                 pass  # e.g. "Abfahrtßpezialisten"
         if oldentry == entry and u'ß' in entry[0]:
             try:
diff --git a/skripte/python/expand_teilwoerter.py b/skripte/python/expand_teilwoerter.py
index e35b723..e868a54 100644
--- a/skripte/python/expand_teilwoerter.py
+++ b/skripte/python/expand_teilwoerter.py
@@ -184,7 +184,7 @@ def expand_wordfile(wordfile):
         try:
             entries = split_entry(entry)
         except IndexError:  # unterschiedliche Zerlegung je nach Sprache
-            print "problematisch", unicode(entry)
+            # print "problematisch", unicode(entry)
             words[entry[0].lower()] = entry
             continue
         
@@ -209,7 +209,6 @@ if __name__ == '__main__':
     
     outfile = open('wortliste-expandiert', 'w')
 
-
     for entry in sorted(words.values(), key=sortkey_duden):
         outfile.write(str(entry))
         outfile.write('\n')
-- 
2.11.4.GIT