1 from bs4
import BeautifulSoup
, NavigableString
, Comment
6 whitespace
= re
.compile('[ \r\n]+')
8 def get_id_from_cfg(text
):
10 Formats anchor ID from config option.
12 if text
[:6] == '$cfg[\'':
14 if text
[-2:] == '\']':
16 text
= text
.replace('[$i]', '')
17 parts
= text
.split("']['")
18 return 'cfg_%s' % '_'.join(parts
)
20 def format_content(tag
, ignore_links
= False, skip
= (), document_mode
= False):
22 Parses inline html content.
26 if isinstance(item
, NavigableString
):
27 text
= whitespace
.sub(' ', item
.string
)
29 out
.append(text
.replace('*', '\\*').replace('_', '\\_'))
33 if isinstance(item
, Comment
):
36 # skip breaks, they are mostly invalid anyway
41 if item
.name
== 'img':
47 if item
.name
== 'a' and 'href' in item
.attrs
:
48 content
= format_content(item
)
52 href
= item
.attrs
['href']
54 if content
== 'details' or 'see' in content
:
55 out
.append('see :ref:`%s`' % href
[1:])
56 elif 'FAQ' in content
:
57 out
.append(':ref:`%s`' % href
[1:])
59 out
.append(':ref:`%s`' % href
[1:])
61 out
.append('`%s <%s>`_' % (content
, href
))
63 if item
.name
== 'code':
64 out
.append('``%s``' % format_content(item
))
66 if item
.name
== 'strong' or (item
.name
== 'span' and 'class' in item
.attrs
and 'important' in item
.attrs
['class']):
67 out
.append('**%s**' % format_content(item
))
70 out
.append('*%s*' % format_content(item
))
72 if item
.name
== 'abbr':
73 out
.append(':abbr:`%s (%s)`' % (format_content(item
), item
.attrs
['title']))
75 if item
.name
== 'sup':
76 out
.append(':sup:`%s`' % format_content(item
))
78 if item
.name
== 'sub':
79 out
.append(':sub:`%s`' % format_content(item
))
81 if item
.name
== 'span':
82 out
.append(format_content(item
))
86 print textwrap
.fill(''.join(out
).strip()).encode('utf-8')
94 raise Exception('Unknown tag')
96 print textwrap
.fill(''.join(out
).strip()).encode('utf-8')
102 tagid
= tag
.get('id')
103 if tagid
is not None:
104 print '.. _%s:' % tagid
107 def parse_block(tag
):
113 sys
.stdout
= open('%s.rst' % tag
.get('id'), 'w')
116 print '=' * len(tag
.text
)
118 elif tag
.name
== 'h3':
121 print '+' * len(tag
.text
)
123 elif tag
.name
in ('h4', 'h5'):
125 text
= format_content(tag
, True)
126 print text
.encode('utf-8')
127 print '-' * len(text
)
129 elif tag
.name
== 'p':
130 text
= format_content(tag
)
131 print textwrap
.fill(text
).encode('utf-8')
133 elif tag
.name
in ('ul', 'ol'):
140 if isinstance(li
, NavigableString
) and li
.string
.strip() == '':
144 if isinstance(li
, Comment
):
148 raise Exception('UL contains %s' % li
.name
)
149 text
= format_content(li
, skip
= ('ul', 'li', 'pre', 'p'))
151 indent
= ' ' * (len(header
) + 1)
152 joiner
= '\n%s' % indent
153 print joiner
.join(textwrap
.wrap(text
)).encode('utf-8')
155 if isinstance(item
, NavigableString
):
156 # Already handle above
158 if item
.name
== 'ul':
161 if isinstance(lii
, NavigableString
) and lii
.string
.strip() == '':
164 raise Exception('UL contains %s' % lii
.name
)
165 text
= format_content(lii
)
167 joiner
= '\n%s ' % indent
168 print joiner
.join(textwrap
.wrap(text
)).encode('utf-8')
170 elif item
.name
== 'pre':
172 print indent
+ '.. code-block:: none'
174 for line
in item
.text
.splitlines():
175 print indent
+ ' ', line
.strip().encode('utf-8')
179 elif item
.name
== 'p':
180 text
= format_content(item
)
181 print textwrap
.fill(text
, initial_indent
= indent
).encode('utf-8')
185 elif tag
.name
== 'dl':
189 if isinstance(li
, NavigableString
) and li
.string
.strip() == '':
193 if isinstance(li
, Comment
):
198 cfg
= dt_id
is not None and ('cfg' in dt_id
or 'servers' in dt_id
or 'control' in dt_id
or 'bookmark' in dt_id
or 'table' in dt_id
or 'pmadb' in dt_id
or 'relation' in dt_id
or 'col_com' in dt_id
or 'history' in dt_id
or 'recent' in dt_id
or 'tracking' in dt_id
or 'designer' in dt_id
or 'Arbitrary' in dt_id
or 'userconfig' in dt_id
)
203 if not isinstance(subtag
, NavigableString
) and subtag
.get('id') is not None:
204 ids
.append(subtag
.get('id'))
209 if not isinstance(subtag
, NavigableString
) and subtag
.get('id') is not None:
216 if isinstance(subtag
, NavigableString
):
217 text
+= subtag
.string
218 elif subtag
.name
== 'span':
220 elif subtag
.name
== 'br':
227 for option
in options
:
228 if option
.strip() == '':
231 optname
, opttype
= option
.split(' ', 1)
235 optname
= optname
.strip()
236 opttype
= opttype
.strip()
237 config_options
.append((optname
, opttype
))
238 newid
= get_id_from_cfg(optname
)
243 print '.. _%s:' % anchor
245 for optname
, opttype
in config_options
:
246 print '.. config:option:: %s' % optname
248 print ' :type: %s' % opttype
252 text
= format_content(li
).encode('utf-8')
254 print '-' * len(text
)
256 elif li
.name
== 'dd':
257 format_content(li
, document_mode
= True)
261 raise Exception('Unknown tag')
262 elif tag
.name
== 'pre':
263 print '.. code-block:: none'
265 for line
in tag
.text
.splitlines():
266 print ' ', line
.strip().encode('utf-8')
271 raise Exception('Unknown tag')
274 s
= BeautifulSoup(file(sys
.argv
[1]).read())
276 for tag
in s
.html
.body
.find(id = 'body'):
279 if isinstance(tag
, NavigableString
) and tag
.string
.strip() == '':
283 if isinstance(tag
, Comment
):