don't treat c:... as URL on Windows
[jhbuild.git] / jhbuild / utils / httpcache.py
blob17a6fb76f8b52b9dca3e195b970d9ef75deb3c25
1 # jhbuild - a build script for GNOME 1.x and 2.x
2 # Copyright (C) 2001-2006 James Henstridge
4 # httpcache.py: a simple HTTP cache
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 '''Very simple minded class that can be used to maintain a cache of files
21 downloaded from web servers. It is designed to reduce load on web servers,
22 and draws ideas from feedparser.py. Strategies include:
23 - If a resource has been checked in the last 6 hours, consider it current.
24 - support gzip transfer encoding.
25 - send If-Modified-Since and If-None-Match headers when validating a
26 resource to reduce downloads when the file has not changed.
27 - honour Expires headers returned by server. If no expiry time is
28 given, it defaults to 6 hours.
29 '''
31 import os
32 import sys
33 import urllib2
34 import urlparse
35 import time
36 import rfc822
37 import StringIO
38 try:
39 import gzip
40 except ImportError:
41 gzip = None
43 try:
44 import xml.dom.minidom
45 except ImportError:
46 raise SystemExit, _('Python xml packages are required but could not be found')
48 def _parse_isotime(string):
49 if string[-1] != 'Z':
50 return time.mktime(time.strptime(string, '%Y-%m-%dT%H:%M:%S'))
51 tm = time.strptime(string, '%Y-%m-%dT%H:%M:%SZ')
52 return time.mktime(tm[:8] + (0,)) - time.timezone
54 def _format_isotime(tm):
55 return time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime(tm))
57 def _parse_date(date):
58 tm = rfc822.parsedate_tz(date)
59 if tm:
60 return rfc822.mktime_tz(tm)
61 return 0
63 class CacheEntry:
64 def __init__(self, uri, local, modified, etag, expires=0):
65 self.uri = uri
66 self.local = local
67 self.modified = modified
68 self.etag = etag
69 self.expires = expires
71 class Cache:
72 try:
73 cachedir = os.path.join(os.environ['XDG_CACHE_HOME'], 'jhbuild')
74 except KeyError:
75 cachedir = os.path.join(os.environ['HOME'], '.cache','jhbuild')
77 # default to a 6 hour expiry time.
78 default_age = 6 * 60 * 60
80 def __init__(self, cachedir=None):
81 if cachedir:
82 self.cachedir = cachedir
83 if not os.path.exists(self.cachedir):
84 os.makedirs(self.cachedir)
85 self.entries = {}
87 def read_cache(self):
88 self.entries = {}
89 cindex = os.path.join(self.cachedir, 'index.xml')
90 try:
91 document = xml.dom.minidom.parse(cindex)
92 except:
93 return # treat like an empty cache
94 if document.documentElement.nodeName != 'cache':
95 document.unlink()
96 return # doesn't look like a cache
98 for node in document.documentElement.childNodes:
99 if node.nodeType != node.ELEMENT_NODE: continue
100 if node.nodeName != 'entry': continue
101 uri = node.getAttribute('uri')
102 local = str(node.getAttribute('local'))
103 if node.hasAttribute('modified'):
104 modified = node.getAttribute('modified')
105 else:
106 modified = None
107 if node.hasAttribute('etag'):
108 etag = node.getAttribute('etag')
109 else:
110 etag = None
111 expires = _parse_isotime(node.getAttribute('expires'))
112 # only add to cache list if file actually exists.
113 if os.path.exists(os.path.join(self.cachedir, local)):
114 self.entries[uri] = CacheEntry(uri, local, modified,
115 etag, expires)
116 document.unlink()
118 def write_cache(self):
119 cindex = os.path.join(self.cachedir, 'index.xml')
122 document = xml.dom.minidom.Document()
123 document.appendChild(document.createElement('cache'))
124 node = document.createTextNode('\n')
125 document.documentElement.appendChild(node)
126 for uri in self.entries.keys():
127 entry = self.entries[uri]
128 node = document.createElement('entry')
129 node.setAttribute('uri', entry.uri)
130 node.setAttribute('local', entry.local)
131 if entry.modified:
132 node.setAttribute('modified', entry.modified)
133 if entry.etag:
134 node.setAttribute('etag', entry.etag)
135 node.setAttribute('expires', _format_isotime(entry.expires))
136 document.documentElement.appendChild(node)
138 node = document.createTextNode('\n')
139 document.documentElement.appendChild(node)
141 document.writexml(open(cindex, 'w'))
142 document.unlink()
144 def _make_filename(self, uri):
145 '''picks a unique name for a new entry in the cache.
146 Very simplistic.'''
147 # get the basename from the URI
148 parts = urlparse.urlparse(uri, allow_fragments=False)
149 base = parts[2].split('/')[-1]
150 if not base: base = 'index.html'
152 is_unique = False
153 while not is_unique:
154 is_unique = True
155 for uri in self.entries.keys():
156 if self.entries[uri].local == base:
157 is_unique = False
158 break
159 if not is_unique:
160 base = base + '-'
161 return base
163 def load(self, uri, nonetwork=False, age=None):
164 '''Downloads the file associated with the URI, and returns a local
165 file name for contents.'''
166 # pass file URIs straight through -- no need to cache them
167 parts = urlparse.urlparse(uri)
168 if parts[0] in ('', 'file'):
169 return parts[2]
170 if sys.platform.startswith('win') and uri[1] == ':':
171 # On Windows, path like c:... are local
172 return uri
174 now = time.time()
176 # is the file cached and not expired?
177 self.read_cache()
178 entry = self.entries.get(uri)
179 if entry and (age != 0 or nonetwork):
180 if (nonetwork or now <= entry.expires):
181 return os.path.join(self.cachedir, entry.local)
183 if nonetwork:
184 raise RuntimeError(_('file not in cache, but not allowed to check network'))
186 request = urllib2.Request(uri)
187 if gzip:
188 request.add_header('Accept-encoding', 'gzip')
189 if entry:
190 if entry.modified:
191 request.add_header('If-Modified-Since', entry.modified)
192 if entry.etag:
193 request.add_header('If-None-Match', entry.etag)
195 try:
196 response = urllib2.urlopen(request)
198 # get data, and gunzip it if it is encoded
199 data = response.read()
200 if gzip and response.headers.get('Content-Encoding', '') == 'gzip':
201 try:
202 data = gzip.GzipFile(fileobj=StringIO.StringIO(data)).read()
203 except:
204 data = ''
206 expires = response.headers.get('Expires')
208 # add new content to cache
209 entry = CacheEntry(uri, self._make_filename(uri),
210 response.headers.get('Last-Modified'),
211 response.headers.get('ETag'))
212 filename = os.path.join(self.cachedir, entry.local)
213 open(filename, 'wb').write(data)
214 except urllib2.HTTPError, e:
215 if e.code == 304: # not modified; update validated
216 expires = e.hdrs.get('Expires')
217 filename = os.path.join(self.cachedir, entry.local)
218 else:
219 raise
221 # set expiry date
222 entry.expires = _parse_date(expires)
223 if entry.expires <= now: # ignore expiry times that have already passed
224 if age is None:
225 age = self.default_age
226 entry.expires = now + age
228 # save cache
229 self.entries[uri] = entry
230 self.write_cache()
231 return filename
233 _cache = None
234 def load(uri, nonetwork=False, age=None):
235 '''Downloads the file associated with the URI, and returns a local
236 file name for contents.'''
237 global _cache
238 if not _cache: _cache = Cache()
239 return _cache.load(uri, nonetwork=nonetwork, age=age)