Added kanji frequency selector and linked it into KanjiListEditor.
[jben2_gui.git] / python / jben / dict_downloader.py
blob25483e5646004d81cdc7df4e2955cf1ee437075a
1 # -*- coding: utf-8 -*-
3 from __future__ import absolute_import
5 import urllib2, sys, os, random
6 from jben.preferences import Preferences
7 from jben.dict import DictManager
9 # The following is shamelessly copied from:
10 # http://ftp.monash.edu.au/pub/nihongo/.message
11 #old_static_mirror_list = [
12 # "http://ftp.monash.edu.au/pub/nihongo", # Original FTP archive
13 # "http://japanology.arts.kuleuven.ac.be/mirrors/monash", # Belgium
14 # "ftp://ftp.nrc.ca/pub/packages/nihongo", # Canada (fast site)
15 # "http://www.bcit-broadcast.com/monash", # Canada (http only)
16 # "ftp://ftp.sedl.org/pub/mirrors/nihongo", # US(Texas)
17 # "ftp://ftp.net.usf.edu/pub/monash", # IS (Florida)
18 # "ftp://ftp.u-aizu.ac.jp/pub/SciEng/nihongo/ftp.cc.monash.edu.au", # Japan
19 # "ftp://ftp.funet.fi/pub/culture/japan/mirrors/monash", # Finland
20 # "ftp://ftp.uni-duisburg.de/Mirrors/ftp.monash.edu.au/pub/nihongo" # Germany
21 # ]
23 # Based upon checking the mirrors myself, this is the list I'll use
24 # for J-Ben.
25 static_mirror_list = [
26 # Official archive
27 # (redirects to ringtail.its.monash.edu.au/pub/nihongo, same IP address)
28 "http://ftp.monash.edu.au/pub/nihongo",
29 # Up to date mirrors
30 "ftp://ftp.nrc.ca/pub/packages/nihongo", # Canada (fast site)
31 "ftp://ftp.net.usf.edu/pub/monash/pub/nihongo", # IS (Florida)
32 "ftp://ftp.uni-duisburg.de/Mirrors/ftp.monash.edu.au/pub/nihongo", # Germany
33 # *Probably* up to date (but can't tell since I can't see file mtimes)
34 "http://japanology.arts.kuleuven.ac.be/mirrors/monash", # Belgium
35 "http://www.bcit-broadcast.com/monash", # Canada (http only)
36 # old links
37 #"ftp://ftp.u-aizu.ac.jp/pub/SciEng/nihongo/ftp.cc.monash.edu.au", # Japan
38 #"ftp://ftp.funet.fi/pub/culture/japan/mirrors/monash", # Finland
39 # bad links
40 #"ftp://ftp.sedl.org/pub/mirrors/nihongo", # US(Texas) - "not available"
43 def get_mirror_list(from_inet=False,
44 mirror="http://ftp.monash.edu.au/pub/nihongo"):
45 """Grabs a mirror list from a valid mirror of Jim Breen's FTP archive."""
47 # NOTE: although this function does what it says, the mirror list
48 # is not dependable for our purposes. Not all mirrors are up to
49 # date, and one mirror appears to be down. I advise not using
50 # from_inet for the time being.
52 if from_inet:
53 try:
54 f = urllib2.urlopen("%s/%s" % (mirror, ".message"))
55 if f:
56 mirrors = ["http://ftp.monash.edu.au/pub/nihongo"]
57 for line in f:
58 if line[0] == ".":
59 line = line[1:].strip().replace("\t", " ")
60 l = [s.strip() for s in line.split(" ", 1)]
61 url, desc = l
62 url = url.rstrip('/')
63 mirrors.append(url)
64 return mirrors
65 except urllib2.URLError, e:
66 print >> sys.stderr, e
67 pass
69 return []
70 else:
71 return static_mirror_list
73 def download_dict(fname):
74 mirrors = get_mirror_list(from_inet=False)
76 dpath = DictManager.get_writeable_dict_directory()
78 def get_next_mirror():
79 i = random.randrange(0, len(mirrors))
80 return mirrors.pop(i)
82 mirror = get_next_mirror()
84 while True:
85 url = "%s/%s" % (mirror, fname)
86 target_fname = "%s/%s" % (dpath, fname)
87 try:
88 #print "Downloading %s to %s..." % (url, target_fname)
89 resp = urllib2.urlopen(url)
90 data = resp.read()
91 resp.close()
92 if not os.path.exists(dpath):
93 os.mkdir(dpath)
94 ofile = open(target_fname, "wb")
95 ofile.write(data)
96 ofile.close()
97 break
98 except Exception, e:
99 print >> sys.stderr, \
100 "An error occurred; trying the next mirror."
101 print >> sys.stderr, "(%s)" % str(e)
102 mirror = get_next_mirror()
105 def console_iface():
106 p = Preferences()
107 p.load()
109 print "The following dictionaries are available:"
110 dicts = [("edict.gz", "EDICT"),
111 #("edict2.gz", "EDICT2"),
112 ("JMdict.gz", "JMdict (full)"),
113 ("JMdict_e.gz", "JMdict_e (English only)"),
114 ("kanjidic.gz", "KANJIDIC"),
115 #("kanjd212.gz", "KANJD212"),
116 ("kanjidic2.xml.gz", "KANJIDIC2")]
117 for i, (fname, desc) in enumerate(dicts):
118 print "\t%d: %s" % (i+1, desc)
119 sys.stdout.write("Enter the numbers for the dictionaries you want, "
120 "separated by spaces: ")
121 s = sys.stdin.readline()
123 fnames = []
124 for d in s.split():
125 try:
126 i = int(d) - 1
127 if i not in range(len(dicts)):
128 print >> sys.stderr, "Value %s is out of range." % s
129 continue
130 except ValueError, e:
131 print >> sys.stderr, "Ignoring invalid value %s" % d
132 continue
133 fname = dicts[i][0]
134 fnames.append(fname)
136 for fname in fnames:
137 download_dict(fname)
139 if __name__ == "__main__":
140 console_iface()