3 # Released under the terms of the GPLv3
11 from subprocess
import Popen
, PIPE
13 from .constants
import USER_AGENT
14 from .readme
import README
16 def lynxDump(url
, lynxArgs
=[]):
17 if url
== "special:README":
21 p
= Popen(['lynx', '-dump', '-force_html', '-useragent="%s via lynx"' % USER_AGENT
] +
23 stdin
=None, stdout
=PIPE
, stderr
=PIPE
)
24 (lynxStdout
, lynxErrout
) = (p
.stdout
, p
.stderr
)
26 return "", [], "Fatal error - lynx execution failed. Is it installed?"
28 # TODO: work out the encoding somehow? For now we assume it's latin1...
29 lynxDecoded
= codecs
.EncodedFile(lynxStdout
, 'utf8', 'latin1', 'replace')
35 for binaryline
in lynxDecoded
:
36 line
= lynxDecoded
.decode(binaryline
)[0]
37 if line
== 'References\n':
39 # The previous matched 'References' was part of the
47 m
= re
.match(r
'\s*\d+\. (.*)\n', line
)
49 linkUrls
+= [m
.groups()[0]]
55 lynxErr
= lynxErrout
.read().decode('utf8', 'replace')
56 return dumped
, linkUrls
, lynxErr