css: remove definition of footer colours
[mina86.com.git] / tools / compilers.py
blob02aa3e77657d8bbc5306ac5c0ce0a4e2e372f6f3
1 # CSS and HTML files pre-processor. -*- coding: utf-8 -*-
2 # Copyright 2016 by Michał Nazarewicz <mina86@mina86.com>
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or (at
7 # your option) any later version.
9 # This program is distributed in the hope that it will be useful, but
10 # WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 # General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
17 # Unless required by applicable law or agreed to in writing, software
18 # distributed under the Apache License is distributed on an "AS IS"
19 # BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
20 # implied. See the Apache License for the specific language governing
21 # permissions and limitations under the License.
23 import base64
24 import os
25 import re
26 import sys
27 import typing
29 sys.path.insert(1, os.path.abspath(os.path.join(os.path.dirname(__file__),
30 '..', '..', 'htmlmin')))
31 import htmlmin.parser
34 _MIMETYPES: typing.Dict[str, bytes] = {
35 '.jpg': b'image/jpeg',
36 '.png': b'image/png',
37 '.svg': b'image/svg+xml',
40 def _encode_base64(mime: bytes, data: bytes) -> bytes:
41 data = base64.standard_b64encode(data)
42 return b'data:%s;base64,%s' % (mime, data)
44 def _encode_string(mime: bytes, data: bytes, css: bool) -> bytes:
45 chars = r'\0-\x1f\x80-\xff%#'
46 if not css:
47 # In HTML mode we don’t have to URL escape greater than sign, apostrophe
48 # and quote in the string since those can be handled via HTML entities.
49 # However, URL encoding is just three-character long while the entities
50 # are at least four.
51 if data.count(b"'") > data.count(b'"'):
52 chars += r' >"'
53 else:
54 chars += r" >'"
56 str_data = re.sub('[{}]'.format(chars),
57 lambda m: '%%%02x' % ord(m.group(0)),
58 data.decode('utf-8').strip())
60 def fmt(data: str, quote: str = '') -> bytes:
61 quote = quote.encode('ascii')
62 return b'%sdata:%s,%s%s' % (quote, mime, data.encode('ascii'), quote)
64 if not css:
65 return fmt(str_data)
67 str_data.replace('\n', '\\n')
68 x, y = [fmt(str_data.replace(q, '\\' + q), q) for q in '\'"']
69 return x if len(x) < len(y) else y
71 def _insert_data(src_dir: str, path: str, css: bool = False) -> bytes:
72 _, ext = os.path.splitext(path)
73 mime = _MIMETYPES[ext]
74 data = open(os.path.join(src_dir, path), 'rb').read()
76 encoded = _encode_base64(mime, data)
77 if ext != '.svg':
78 return encoded
80 x = encoded
81 y = _encode_string(mime, data, css)
82 return x if len(x) < len(y) else y
85 def _map_static(mappings: typing.Dict[str, str], path: str) -> str:
86 return mappings.get(path, path)
89 def process_css(data: bytes, src_dir: str,
90 mappings: typing.Dict[str, str]) -> bytes:
91 data = re.sub(rb'DATA<([^<>]+)>',
92 lambda m: _insert_data(src_dir, m.group(1).decode('utf-8'),
93 css=True),
94 data)
95 data = re.sub(rb'/d/[-_a-zA-Z0-9.]*',
96 lambda m: _map_static(mappings, m.group(0).decode('utf-8')).encode('utf-8'),
97 data)
98 return data
100 _Attributes = typing.Sequence[typing.Tuple[str, typing.Optional[str]]]
102 class HTMLMinParser(htmlmin.parser.HTMLMinParser):
104 @staticmethod
105 def _minify_css(data):
106 # CSS style, remove unnecessary spaces after punctuation marks.
107 # This is very likely to break non-trivial rules.
108 data = re.sub(r'\s+', ' ', data.strip())
109 return re.sub(r'\s*([:;,{}])\s*', r'\1', data)
111 def __init__(self, *args: typing.Any, **kw: typing.Any):
112 self._static_mappings = kw.pop('static_mappings', None)
113 self._src_dir = kw.pop('src_dir', None)
114 self._self = [[1, kw.pop('self_url', None)]]
115 super().__init__(*args, **kw)
117 def handle_starttag(self, tag: str, attrs: _Attributes) -> None:
118 self._transform_attrs(tag, attrs)
119 self._self[-1][0] += 1
120 super().handle_starttag(tag, attrs)
122 def handle_startendtag(self, tag: str, attrs: _Attributes) -> None:
123 self._transform_attrs(tag, attrs)
124 if not self._self[-1][0]:
125 self._self.pop()
126 super().handle_startendtag(tag, attrs)
128 def handle_endtag(self, tag):
129 self._self[-1][0] -= 1
130 if not self._self[-1][0]:
131 self._self.pop()
132 super().handle_endtag(tag)
134 def _transform_attrs(self, tag: str, attrs: _Attributes) -> None:
135 i = 0
136 while i < len(attrs):
137 attr, value = attrs[i]
138 if (tag in ('path', 'text', 'use', 'rect', 'circle') and
139 attr in ('x', 'cx', 'y', 'cy') and
140 value == '0'):
141 del attrs[i]
142 continue
143 if attr == 'self':
144 self._self.append([0, value])
145 del attrs[i]
146 continue
147 if value:
148 typing.cast(typing.List, attrs)[i] = (
149 attr, self._transform_attr(tag, attr, value))
150 i += 1
152 def _transform_attr(self, tag: str, attr: str, value: str) -> str:
153 if self._static_mappings:
154 ret = self._static_mappings.get(value)
155 if ret:
156 return ret
158 if self._src_dir and tag == 'img' and attr == 'src':
159 return _insert_data(self._src_dir, value).decode('utf-8')
161 value = re.sub(r'\s+', ' ', value.strip())
162 if attr == 'style':
163 value = self._minify_css(value)
164 elif attr == 'd' and tag == 'path':
165 # In SVG’s D attribute of PATH element the only required white-space
166 # is between numbers (except space is not necessary before minus
167 # sign).
168 value = re.sub(r' ?([-a-zA-Z,]) ?', r'\1', value)
169 elif '%s %s' % (tag, attr) in ('link media', 'area coords',
170 'meta content'):
171 # Comma separated lists, remove unnecessary spaces around commas.
172 value = re.sub(r' ?, ?', ',', value)
173 elif attr in ('href', 'src') and tag != 'base':
174 if value.startswith('https://'):
175 value = value[6:]
176 if value.startswith('//mina86.com/'):
177 value = value[12:]
178 if value.startswith('/self'):
179 s = self._self[-1][1]
180 assert s is not None
181 value = s + value[5:]
182 return value
184 def handle_data(self, data):
185 if self._tag_stack and self._tag_stack[0][0] == 'style':
186 self._data_buffer.append(self._minify_css(data))
187 return
188 super().handle_data(data)
191 def _html_tag_re(tag, **kw):
192 is_open = kw.get('open')
193 is_close = kw.get('close')
194 assert not (is_open and is_close)
196 fmt = r'</{tag}>' if is_close else r'<{slash}{tag}\b[^>]*>'
197 return fmt.format(slash='' if is_open else '/?', tag=tag)
200 _MINIFY_HTML_RE = re.compile(r'''
201 {space} ( {block_tag} ) {space}?
202 | ( {block_tag} ) {space}
203 | {space} ( {text_open} )
204 | ( {text_close} ) {space}
205 | {space} ( {pre_open} ) {blank}*
206 | ( {pre_open} ) {blank}+
207 | \s+ ( {pre_close} ) {space}?
208 | ( {pre_close} ) {space}
209 '''.format(
210 space=r'\ ',
211 blank=r'(?:[ \t]*\n)',
213 block_tag=_html_tag_re('(?:%s)' % '|'.join((
214 'address', 'article', 'aside', 'base', 'blockquote', 'body', 'br',
215 'canvas', 'caption', 'col(?:group)?', 'd[dlt]', 'div',
216 'fig(?:caption|ure)', 'footer', 'form', 'h[1-6r]', 'head(?:er)?',
217 'hgroup', 'iframe', 'li(?:nk)?', 'main', 'meta', 'nav', 'noscript',
218 '[ou]l', 'opt(?:group|ion)', 'p', 'script', 'section', 'style',
219 't(?:able|head|body|foot|[dhr]|itle)',
221 # SVG elements:
222 'svg', 'rect', 'circle', 'g', 'path',
223 ))),
224 text_open=_html_tag_re('text', open=True),
225 text_close=_html_tag_re('text', close=True),
226 pre_open='{pre}(?:{code})?'.format(pre=_html_tag_re('pre', open=True),
227 code=_html_tag_re('code', open=True)),
228 pre_close='(?:{code})?{pre}'.format(pre=_html_tag_re('pre', close=True),
229 code=_html_tag_re('code', close=True)),
230 ), re.VERBOSE)
232 def minify_html(data: str, **kw: typing.Any) -> str:
233 def make_parser(*args: typing.Any, **kwargs: typing.Any) -> HTMLMinParser:
234 kwargs.update(kw)
235 return HTMLMinParser(*args, **kwargs)
237 data = htmlmin.minify(data,
238 remove_comments=True,
239 remove_empty_space=False,
240 remove_all_empty_space=False,
241 reduce_empty_attributes=True,
242 reduce_boolean_attributes=True,
243 remove_optional_attribute_quotes=True,
244 cls=make_parser).strip()
246 def pick_group(m):
247 for i in range(1, m.lastindex + 1):
248 if (grp := m.group(i)) is not None:
249 return grp
251 data = _MINIFY_HTML_RE.sub(pick_group, data)
253 return data