removed main.cfg, not used anywhere
[limo.git] / compress.py
blob94624c44cd5fd22319ee5aad0a7232a6ac0a1966
1 from __future__ import with_statement
2 import zlib, cStringIO, os, struct, hashlib
3 import gzip as _gzip
4 from limoutil import *
5 from fcgi.fcgiutil import flatten
7 def gzip(data, cache_file=None):
8 """ Compresses a string, returns the compressed bytes.
9 If cache_file is specified, uses that file as a cache.
10 """
11 profile("gzip")
12 try:
13 using_temp_file = False
14 if cache_file is None:
15 cache_file = os.path.sep.join([Settings.cacheDir,"tmp_"+hashlib.sha1(os.urandom(1024)).hexdigest()])
16 # log("Using temp file: %s" % cache_file)
17 using_temp_file = True
18 if not os.path.isfile(cache_file):
19 # log("writing %d bytes to cache file: %s" % (len(data), cache_file))
20 with open(cache_file, "wb") as fo:
21 fz = _gzip.GzipFile(fileobj=fo, mode="wb", compresslevel=9)
22 try:
23 fz.write(data)
24 finally:
25 fz.close()
26 with open(cache_file, "rb") as fi:
27 data = fi.read()
28 # log("gzip: read %d bytes" % len(data))
29 if using_temp_file:
30 # log("Deleting temp file: %s" % cache_file)
31 os.remove(cache_file)
32 return data
33 finally:
34 ms = profile("gzip")
35 # log("gzip: %.2f ms" % ms)
37 def manual_gzip(data, cache_file=None):
38 _header = ("\037\213\010\000" # magic, type, flags
39 "\000\000\000\000" # mtime
40 "\002\377") # xfl, os
41 co = zlib.compressobj()
42 return ''.join([_header, co.compress(data)[2:], co.flush()])
43 # struct.pack("<ll",zlib.crc32(data),len(data))])
45 def generate_file(file, block_size=4096):
46 """ Returns a generator that reads a file in blocks, for streaming large files. """
47 with open(file, "rb") as f:
48 while True:
49 data = f.read(block_size)
50 if data is None or len(data) == 0:
51 break
52 yield data
54 def generate_gzip(input, cache_file=None):
55 header = "\037\213"
56 buf = cStringIO.StringIO()
57 pass_through = False
58 for text in flatten(input):
59 if text not in (None, ''):
60 if not pass_through and len(text) >= 2:
61 if str(text[:2]) == header:
62 pass_through = True
63 buf.write(text)
64 if pass_through:
65 # log("Detected an already compressed stream, falling into pass-through mode.")
66 yield buf.getvalue()
67 else:
68 yield gzip(buf.getvalue(), cache_file=cache_file)
70 def generate_deflate(input, cache_file=None):
71 # TODO: pass-through mode
72 buf = cStringIO.StringIO()
73 for text in flatten(input):
74 if text not in (None, ''):
75 buf.write(text)
76 yield deflate(buf.getvalue(), cache_file=cache_file)
79 def __disabled__generate_gzip(input, cache_file=None):
80 """ Returns a generator that reads from the supplied generator,
81 and yields it's output, compressed.
82 """
83 # log("generating gzip (cache_file: %s)" % cache_file)
84 if cache_file is not None and os.path.isfile(cache_file):
85 # log("found existing cache file: os.path.isfile('%s') == %s" % (cache_file, os.path.isfile(cache_file)))
86 yield generate_file(cache_file)
87 else:
88 # log("no cache file, generating a stream, teeing the output to a cachefile at: %s" % cache_file)
89 co = zlib.compressobj()
90 # magic, type, flags + mtime + xfl, os
91 header = "\037\213\010\000"+"\000\000\000\000"+ "\002\377"
92 sent_header = False
93 # the compress module produces 2 extra bytes in the header that are not supported by browsers
94 skim_first_bytes = True
95 # keep track of the checksum of the data we send out
96 checksum = 0
97 len_data = 0
98 # if this stream is already compressed we will fall into pass_through mode
99 pass_through = False
100 cache_file = open(cache_file, "wb") if cache_file is not None else None
101 try:
102 for text in flatten(input):
103 if text is not None and len(text) > 0:
104 if pass_through:
105 yield text
106 else:
107 if len(text) >= 4 and text[:4] == header[:4]: # detect a compression header
108 # log("Detected already compressed stream, flipping to pass trhough mode.")
109 pass_through = True
110 yield text
111 continue
112 # keep a rolling checksum of all data sent
113 checksum = zlib.crc32(text, checksum)
114 len_data += len(text)
115 ztext = co.compress(text)
116 if len(ztext):
117 if not sent_header:
118 yield _tee(cache_file, header)
119 sent_header = True
120 if skim_first_bytes:
121 skim_first_bytes = False
122 yield _tee(cache_file, ztext[2:])
123 else:
124 yield _tee(cache_file, ztext)
125 yield _tee(cache_file, co.flush())
126 yield _tee(cache_file, struct.pack("<l",checksum))
127 finally:
128 if cache_file is not None:
129 cache_file.close()
131 def _tee(f, data):
132 if f is not None:
133 f.write(data)
134 yield data
136 def deflate(data, cache_file=None):
137 using_temp_file = False
138 if cache_file is None:
139 cache_file = os.path.sep.join([Settings.cacheDir,"tmp_"+hashlib.sha1(os.urandom(1024)).hexdigest()])
140 using_temp_file = True
141 if not os.path.isfile(cache_file):
142 with open(cache_file, "wb") as fo:
143 data = cStringIO.StringIO(zlib.compress(data)).read()
144 # log("writing deflate file: %d bytes" % len(data))
145 fo.write(data)
146 else:
147 with open(cache_file, "rb") as fi:
148 data = fi.read()
149 # log("deflate: read %d bytes" % len(data))
150 if using_temp_file:
151 os.remove(cache_file)
152 return data