1 # jhbuild - a build script for GNOME 1.x and 2.x
2 # Copyright (C) 2001-2006 James Henstridge
4 # httpcache.py: a simple HTTP cache
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 '''Very simple minded class that can be used to maintain a cache of files
21 downloaded from web servers. It is designed to reduce load on web servers,
22 and draws ideas from feedparser.py. Strategies include:
23 - If a resource has been checked in the last 6 hours, consider it current.
24 - support gzip transfer encoding.
25 - send If-Modified-Since and If-None-Match headers when validating a
26 resource to reduce downloads when the file has not changed.
27 - honour Expires headers returned by server. If no expiry time is
28 given, it defaults to 6 hours.
44 import xml
.dom
.minidom
46 raise SystemExit, _('Python xml packages are required but could not be found')
48 def _parse_isotime(string
):
50 return time
.mktime(time
.strptime(string
, '%Y-%m-%dT%H:%M:%S'))
51 tm
= time
.strptime(string
, '%Y-%m-%dT%H:%M:%SZ')
52 return time
.mktime(tm
[:8] + (0,)) - time
.timezone
54 def _format_isotime(tm
):
55 return time
.strftime('%Y-%m-%dT%H:%M:%SZ', time
.gmtime(tm
))
57 def _parse_date(date
):
58 tm
= rfc822
.parsedate_tz(date
)
60 return rfc822
.mktime_tz(tm
)
64 def __init__(self
, uri
, local
, modified
, etag
, expires
=0):
67 self
.modified
= modified
69 self
.expires
= expires
73 cachedir
= os
.path
.join(os
.environ
['XDG_CACHE_HOME'], 'jhbuild')
75 cachedir
= os
.path
.join(os
.environ
['HOME'], '.cache','jhbuild')
77 # default to a 6 hour expiry time.
78 default_age
= 6 * 60 * 60
80 def __init__(self
, cachedir
=None):
82 self
.cachedir
= cachedir
83 if not os
.path
.exists(self
.cachedir
):
84 os
.makedirs(self
.cachedir
)
89 cindex
= os
.path
.join(self
.cachedir
, 'index.xml')
91 document
= xml
.dom
.minidom
.parse(cindex
)
93 return # treat like an empty cache
94 if document
.documentElement
.nodeName
!= 'cache':
96 return # doesn't look like a cache
98 for node
in document
.documentElement
.childNodes
:
99 if node
.nodeType
!= node
.ELEMENT_NODE
: continue
100 if node
.nodeName
!= 'entry': continue
101 uri
= node
.getAttribute('uri')
102 local
= str(node
.getAttribute('local'))
103 if node
.hasAttribute('modified'):
104 modified
= node
.getAttribute('modified')
107 if node
.hasAttribute('etag'):
108 etag
= node
.getAttribute('etag')
111 expires
= _parse_isotime(node
.getAttribute('expires'))
112 # only add to cache list if file actually exists.
113 if os
.path
.exists(os
.path
.join(self
.cachedir
, local
)):
114 self
.entries
[uri
] = CacheEntry(uri
, local
, modified
,
118 def write_cache(self
):
119 cindex
= os
.path
.join(self
.cachedir
, 'index.xml')
122 document
= xml
.dom
.minidom
.Document()
123 document
.appendChild(document
.createElement('cache'))
124 node
= document
.createTextNode('\n')
125 document
.documentElement
.appendChild(node
)
126 for uri
in self
.entries
.keys():
127 entry
= self
.entries
[uri
]
128 node
= document
.createElement('entry')
129 node
.setAttribute('uri', entry
.uri
)
130 node
.setAttribute('local', entry
.local
)
132 node
.setAttribute('modified', entry
.modified
)
134 node
.setAttribute('etag', entry
.etag
)
135 node
.setAttribute('expires', _format_isotime(entry
.expires
))
136 document
.documentElement
.appendChild(node
)
138 node
= document
.createTextNode('\n')
139 document
.documentElement
.appendChild(node
)
141 document
.writexml(open(cindex
, 'w'))
144 def _make_filename(self
, uri
):
145 '''picks a unique name for a new entry in the cache.
147 # get the basename from the URI
148 parts
= urlparse
.urlparse(uri
, allow_fragments
=False)
149 base
= parts
[2].split('/')[-1]
150 if not base
: base
= 'index.html'
155 for uri
in self
.entries
.keys():
156 if self
.entries
[uri
].local
== base
:
163 def load(self
, uri
, nonetwork
=False, age
=None):
164 '''Downloads the file associated with the URI, and returns a local
165 file name for contents.'''
166 # pass file URIs straight through -- no need to cache them
167 parts
= urlparse
.urlparse(uri
)
168 if parts
[0] in ('', 'file'):
170 if sys
.platform
.startswith('win') and uri
[1] == ':':
171 # On Windows, path like c:... are local
176 # is the file cached and not expired?
178 entry
= self
.entries
.get(uri
)
179 if entry
and (age
!= 0 or nonetwork
):
180 if (nonetwork
or now
<= entry
.expires
):
181 return os
.path
.join(self
.cachedir
, entry
.local
)
184 raise RuntimeError(_('file not in cache, but not allowed to check network'))
186 request
= urllib2
.Request(uri
)
188 request
.add_header('Accept-encoding', 'gzip')
191 request
.add_header('If-Modified-Since', entry
.modified
)
193 request
.add_header('If-None-Match', entry
.etag
)
196 response
= urllib2
.urlopen(request
)
198 # get data, and gunzip it if it is encoded
199 data
= response
.read()
200 if gzip
and response
.headers
.get('Content-Encoding', '') == 'gzip':
202 data
= gzip
.GzipFile(fileobj
=StringIO
.StringIO(data
)).read()
206 expires
= response
.headers
.get('Expires')
208 # add new content to cache
209 entry
= CacheEntry(uri
, self
._make
_filename
(uri
),
210 response
.headers
.get('Last-Modified'),
211 response
.headers
.get('ETag'))
212 filename
= os
.path
.join(self
.cachedir
, entry
.local
)
213 open(filename
, 'wb').write(data
)
214 except urllib2
.HTTPError
, e
:
215 if e
.code
== 304: # not modified; update validated
216 expires
= e
.hdrs
.get('Expires')
217 filename
= os
.path
.join(self
.cachedir
, entry
.local
)
222 entry
.expires
= _parse_date(expires
)
223 if entry
.expires
<= now
: # ignore expiry times that have already passed
225 age
= self
.default_age
226 entry
.expires
= now
+ age
229 self
.entries
[uri
] = entry
234 def load(uri
, nonetwork
=False, age
=None):
235 '''Downloads the file associated with the URI, and returns a local
236 file name for contents.'''
238 if not _cache
: _cache
= Cache()
239 return _cache
.load(uri
, nonetwork
=nonetwork
, age
=age
)