2 # Stephen Paulger <stephen.paulger@gmail.com>
6 # python WordMirror.py haus
7 # nb. there is an issue in the command line test output with umlauts
8 # this is not a problem if you access the string objects rather
9 # than printing the result dict directly
13 # result = WordMirror.WordMirrorNounInflection("Ecke")
14 # print result["Gender"]
15 # print result["Indefinite Article"]["Genitive"]
17 from urllib2
import urlopen
19 def WordMirrorNounInflection(noun
):
21 url
= "http://www.wordmirror.com/inflect.php?lang=en&lp=de_en&q=%s&version=0&pos=noun" % (noun
)
23 webdoc
= urlopen(url
).read()
24 doclines
= webdoc
.splitlines()
36 # Introduction begins with <!-- introductory table -->
37 # and ends with <!-- end of introductory table -->
38 if line
.find("introductory table") >= 0:
39 readingIntro
= not readingIntro
41 if readingIntro
and line
.endswith(":"):
42 key
= line
[line
.find(">")+1:-1]
43 elif readingIntro
and line
.find("<td>") >= 0:
44 inflect
[key
] = line
[line
.find("<td>")+4:]
46 # Main table begins with <!-- main table -->
47 # and ends with <!-- end of main table -->
48 # except the beginning line is duplicated so we must ignore the first.
49 if line
.find("main table") >= 0:
52 # Exit the loop after the main table is read
58 # We read the table headings, the first one isn't any use to us
60 if line
.find("tableheading") >= 0:
62 if mainTableHeading
< 2:
65 # If we got here, then the table heading is one we're
67 key
= line
[line
.find(">")+1:-5].replace("<br>"," ")
72 cases
= ["Nominative", "Accusative", "Dative", "Genitive"]
74 if line
.endswith("</td>"):
75 inflect
[key
][cases
[caseNum
]] = line
[line
.find(">")+1:-5]
80 def getParadigms(dixFile
, wordType
):
81 from xml
.dom
import minidom
84 doc
= minidom
.parse("apertium-en-de.de.dix").documentElement
85 xpath
= '/dictionary/pardefs/pardef[substring-after(@n,"__")="%s"]' % (wordType
)
86 return xpath
.Evaluate(xpath
, doc
)
88 def matchParadigm(dixFile
, wordType
, paradigm
):
89 pardefs
= getParadigms(dixFile
, wordType
)
90 for pardef
in pardefs
:
94 if __name__
== "__main__":
96 inflect
= WordMirrorNounInflection(sys
.argv
[1])