Translated using Weblate (Bulgarian)
[phpmyadmin.git] / scripts / convert-htmldoc-to-rst.py
blobe2b536a3fb9f6fc3b1f350073b237e8504da35c0
1 from bs4 import BeautifulSoup, NavigableString, Comment
2 import sys
3 import re
4 import textwrap
6 whitespace = re.compile('[ \r\n]+')
8 def get_id_from_cfg(text):
9 '''
10 Formats anchor ID from config option.
11 '''
12 if text[:6] == '$cfg[\'':
13 text = text[6:]
14 if text[-2:] == '\']':
15 text = text[:-2]
16 text = text.replace('[$i]', '')
17 parts = text.split("']['")
18 return 'cfg_%s' % '_'.join(parts)
20 def format_content(tag, ignore_links = False, skip = (), document_mode = False):
21 '''
22 Parses inline html content.
23 '''
24 out = []
25 for item in tag:
26 if isinstance(item, NavigableString):
27 text = whitespace.sub(' ', item.string)
28 if text != '':
29 out.append(text.replace('*', '\\*').replace('_', '\\_'))
30 continue
32 # skip comments
33 if isinstance(item, Comment):
34 continue
36 # skip breaks, they are mostly invalid anyway
37 if item.name == 'br':
38 continue
40 # skip images
41 if item.name == 'img':
42 continue
44 if item.name in skip:
45 continue
47 if item.name == 'a' and 'href' in item.attrs:
48 content = format_content(item)
49 if ignore_links:
50 out.append(content)
51 continue
52 href = item.attrs['href']
53 if href[0] == '#':
54 if content == 'details' or 'see' in content:
55 out.append('see :ref:`%s`' % href[1:])
56 elif 'FAQ' in content:
57 out.append(':ref:`%s`' % href[1:])
58 else:
59 out.append(':ref:`%s`' % href[1:])
60 else:
61 out.append('`%s <%s>`_' % (content, href))
62 continue
63 if item.name == 'code':
64 out.append('``%s``' % format_content(item))
65 continue
66 if item.name == 'strong' or (item.name == 'span' and 'class' in item.attrs and 'important' in item.attrs['class']):
67 out.append('**%s**' % format_content(item))
68 continue
69 if item.name == 'em':
70 out.append('*%s*' % format_content(item))
71 continue
72 if item.name == 'abbr':
73 out.append(':abbr:`%s (%s)`' % (format_content(item), item.attrs['title']))
74 continue
75 if item.name == 'sup':
76 out.append(':sup:`%s`' % format_content(item))
77 continue
78 if item.name == 'sub':
79 out.append(':sub:`%s`' % format_content(item))
80 continue
81 if item.name == 'span':
82 out.append(format_content(item))
83 continue
85 if document_mode:
86 print textwrap.fill(''.join(out).strip()).encode('utf-8')
87 print
88 out = []
89 parse_block(item)
90 continue
92 print item.name
93 print item.attrs
94 raise Exception('Unknown tag')
95 if document_mode:
96 print textwrap.fill(''.join(out).strip()).encode('utf-8')
97 print
98 ret = ''.join(out)
99 return ret.strip()
101 def print_id(tag):
102 tagid = tag.get('id')
103 if tagid is not None:
104 print '.. _%s:' % tagid
105 print
107 def parse_block(tag):
109 Parses block tag.
111 if tag.name == 'h2':
112 sys.stdout.close()
113 sys.stdout = open('%s.rst' % tag.get('id'), 'w')
114 print_id(tag)
115 print tag.text
116 print '=' * len(tag.text)
117 print
118 elif tag.name == 'h3':
119 print_id(tag)
120 print tag.text
121 print '+' * len(tag.text)
122 print
123 elif tag.name in ('h4', 'h5'):
124 print_id(tag)
125 text = format_content(tag, True)
126 print text.encode('utf-8')
127 print '-' * len(text)
128 print
129 elif tag.name == 'p':
130 text = format_content(tag)
131 print textwrap.fill(text).encode('utf-8')
132 print
133 elif tag.name in ('ul', 'ol'):
134 if tag.name == 'ul':
135 header = '*'
136 else:
137 header = '#.'
138 for li in tag:
139 # skip empty
140 if isinstance(li, NavigableString) and li.string.strip() == '':
141 continue
143 # skip comments
144 if isinstance(li, Comment):
145 continue
147 if li.name != 'li':
148 raise Exception('UL contains %s' % li.name)
149 text = format_content(li, skip = ('ul', 'li', 'pre', 'p'))
150 print header,
151 indent = ' ' * (len(header) + 1)
152 joiner = '\n%s' % indent
153 print joiner.join(textwrap.wrap(text)).encode('utf-8')
154 for item in li:
155 if isinstance(item, NavigableString):
156 # Already handle above
157 continue
158 if item.name == 'ul':
159 print
160 for lii in item:
161 if isinstance(lii, NavigableString) and lii.string.strip() == '':
162 continue
163 if lii.name != 'li':
164 raise Exception('UL contains %s' % lii.name)
165 text = format_content(lii)
166 print indent + '*',
167 joiner = '\n%s ' % indent
168 print joiner.join(textwrap.wrap(text)).encode('utf-8')
169 print
170 elif item.name == 'pre':
171 print
172 print indent + '.. code-block:: none'
173 print
174 for line in item.text.splitlines():
175 print indent + ' ', line.strip().encode('utf-8')
176 print
178 print
179 elif item.name == 'p':
180 text = format_content(item)
181 print textwrap.fill(text, initial_indent = indent).encode('utf-8')
182 print
183 print
185 elif tag.name == 'dl':
186 cfg = False
187 for li in tag:
188 # skip empty
189 if isinstance(li, NavigableString) and li.string.strip() == '':
190 continue
192 # skip comments
193 if isinstance(li, Comment):
194 continue
196 if li.name == 'dt':
197 dt_id = li.get('id')
198 cfg = dt_id is not None and ('cfg' in dt_id or 'servers' in dt_id or 'control' in dt_id or 'bookmark' in dt_id or 'table' in dt_id or 'pmadb' in dt_id or 'relation' in dt_id or 'col_com' in dt_id or 'history' in dt_id or 'recent' in dt_id or 'tracking' in dt_id or 'designer' in dt_id or 'Arbitrary' in dt_id or 'userconfig' in dt_id)
199 if cfg:
200 # Extract all IDs
201 ids = [dt_id]
202 for subtag in li:
203 if not isinstance(subtag, NavigableString) and subtag.get('id') is not None:
204 ids.append(subtag.get('id'))
205 else:
206 # Print all IDs
207 print_id(li)
208 for subtag in li:
209 if not isinstance(subtag, NavigableString) and subtag.get('id') is not None:
210 print_id(subtag)
211 # Extract text
212 if cfg:
213 options = []
214 text = ''
215 for subtag in li:
216 if isinstance(subtag, NavigableString):
217 text += subtag.string
218 elif subtag.name == 'span':
219 text += subtag.text
220 elif subtag.name == 'br':
221 options.append(text)
222 text = ''
223 if text != '':
224 options.append(text)
225 ids = set(ids)
226 config_options = []
227 for option in options:
228 if option.strip() == '':
229 continue
230 try:
231 optname, opttype = option.split(' ', 1)
232 except:
233 optname = option
234 opttype = ''
235 optname = optname.strip()
236 opttype = opttype.strip()
237 config_options.append((optname, opttype))
238 newid = get_id_from_cfg(optname)
239 if newid in ids:
240 ids.remove(newid)
242 for anchor in ids:
243 print '.. _%s:' % anchor
245 for optname, opttype in config_options:
246 print '.. config:option:: %s' % optname
247 print
248 print ' :type: %s' % opttype
249 print ' :default:'
250 print
251 else:
252 text = format_content(li).encode('utf-8')
253 print text
254 print '-' * len(text)
255 print
256 elif li.name == 'dd':
257 format_content(li, document_mode = True)
258 else:
259 print li.name
260 print li.attrs
261 raise Exception('Unknown tag')
262 elif tag.name == 'pre':
263 print '.. code-block:: none'
264 print
265 for line in tag.text.splitlines():
266 print ' ', line.strip().encode('utf-8')
267 print
268 else:
269 print tag.name
270 print tag.attrs
271 raise Exception('Unknown tag')
274 s = BeautifulSoup(file(sys.argv[1]).read())
276 for tag in s.html.body.find(id = 'body'):
278 # skip empty
279 if isinstance(tag, NavigableString) and tag.string.strip() == '':
280 continue
282 # skip comments
283 if isinstance(tag, Comment):
284 continue
286 parse_block(tag)