From df87b1e2fd181627064ee8ba6a3132d0cc5233d2 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Andr=C3=A9=20Wobst?= Date: Mon, 1 Sep 2014 19:11:52 +0000 Subject: [PATCH] add chroot config option needed to use a chrooted TeX installation git-svn-id: http://svn.code.sf.net/p/pyx/code/trunk/pyx@3606 a4f5e268-e194-4f32-bce1-d30804cbbcc5 --- CHANGES | 3 ++- manual/text.rst | 35 ++++++++++++++++++++++++++++------- pyx/text.py | 47 +++++++++++++++++++++++++++++++---------------- 3 files changed, 61 insertions(+), 24 deletions(-) diff --git a/CHANGES b/CHANGES index 852870aa..0ab0bc3d 100644 --- a/CHANGES +++ b/CHANGES @@ -4,9 +4,10 @@ XXX - color module: - fix grey class - epsfile module: - - fix parsing of bounding box + - fix parsing of bounding box - text module: - no end of pages test when no dvi is created at all + - add chroot config option needed to use a chrooted TeX installation 0.13 (2013/12/20): - Requires at least Python 3.2 diff --git a/manual/text.rst b/manual/text.rst index a568e9ef..1705b5d5 100644 --- a/manual/text.rst +++ b/manual/text.rst @@ -20,13 +20,13 @@ of the whole machinery. PyX does not apply any limitations on the text submitted by the user. Instead the text is directly passed to TeX. This has the implication, that the text to -be typeset should come from a trusted source or some security measures should -have been applied already. PyX just adds a light and transparent wrapper using -basic TeX functionality for later identification and output extraction. This -procedure enables full access to all TeX features and makes PyX on the other -hand dependent on the error handling provided by TeX. However, a detailed and -immediate control of the TeX output allows PyX to report problems back to the -user as they occur. +be typeset should come from a trusted source or some special security measures +should be applied (see :ref:`chroot`). PyX just adds a light and transparent +wrapper using basic TeX functionality for later identification and output +extraction. This procedure enables full access to all TeX features and makes +PyX on the other hand dependent on the error handling provided by TeX. However, +a detailed and immediate control of the TeX output allows PyX to report +problems back to the user as they occur. While we only talked about TeX so far (and will continue to do so in the rest of this section), it is important to note that the coupling is not limited to @@ -529,6 +529,8 @@ moment: .. _config: +.. _pyxrc: + Configuration ============= @@ -628,6 +630,21 @@ Python program: .. autofunction:: pyxinfo +.. _chroot: + +Typesetting insecure text +------------------------- + +When typesetting text it is passed to a TeX interpreter unchanged\ [#]_. This +is a security problem if the text does not come from a trusted source. While +full access to all typesetting features is not considered a problem, you should +bear in mind that TeX code can be used to read data from any other file +accessible to the TeX process. To surely prevent this process from accessing +any other data unrelated to the TeX installation, you can setup a chroot +environment for the TeX interpreter and configure PyX to use it. This can be +achieved by setting the ``chroot`` option and adjusting the TeX interpreter +call and the ``filelocator`` configuration in the ``pyxrc``. + .. rubric:: Footnotes .. [#] https://en.wikipedia.org/wiki/TeX @@ -638,3 +655,7 @@ Python program: paragraph. But be sure that the ``pyxgraphics`` keyword argument is always set! +.. [#] The text is actually passed as an argument of a TeX command defined by + PyX, but this is a minor detail and has no effect regarding possible + attacks. + diff --git a/pyx/text.py b/pyx/text.py index 0712e74a..3a27b4df 100644 --- a/pyx/text.py +++ b/pyx/text.py @@ -293,13 +293,13 @@ class texmessage: r = remove_nested_brackets(msg) r, m = remove_pattern(texmessage.quoted_file_pattern, r) while m: - if not os.path.isfile(m.group("filename")): + if not os.path.isfile(config.get("text", "chroot", "") + m.group("filename")): return msg r, m = remove_pattern(texmessage.quoted_file_pattern, r) r, m = remove_pattern(texmessage.file_pattern, r, ignore_nl=False) while m: for filename in itertools.accumulate(m.group("filename").split("\n")): - if os.path.isfile(filename): + if os.path.isfile(config.get("text", "chroot", "") + filename): break else: return msg @@ -316,7 +316,7 @@ class texmessage: for p in [texmessage.quoted_def_pattern, texmessage.def_pattern]: r, m = remove_pattern(p, r) while m: - if not os.path.isfile(m.group("filename")): + if not os.path.isfile(config.get("text", "chroot", "") + m.group("filename")): return msg r, m = remove_pattern(texmessage.quoted_file_pattern, r) return r @@ -331,7 +331,7 @@ class texmessage: for p in [texmessage.quoted_graphics_pattern, texmessage.graphics_pattern]: r, m = remove_pattern(p, r) while m: - if not os.path.isfile(m.group("filename")): + if not os.path.isfile(config.get("text", "chroot", "") + m.group("filename")): return msg r, m = remove_pattern(texmessage.quoted_file_pattern, r) return r @@ -938,7 +938,7 @@ class SingleRunner: texmessages_run_default = [texmessage.font_warning, texmessage.box_warning, texmessage.package_warning, texmessage.load_def, texmessage.load_graphics] - def __init__(self, executable, + def __init__(self, cmd, texenc="ascii", usefiles=[], texipc=config.getboolean("text", "texipc", 0), @@ -953,11 +953,12 @@ class SingleRunner: .. note:: This class cannot be used directly. It is the base class for all texrunners and provides most of the implementation. - Still, to the end user the parameters except for *executable* + Still, to the end user the parameters except for *cmd* are important, as they are preserved in derived classes usually. - :param str executable: command to start the TeX interpreter + :param cmd: command and arguments to start the TeX interpreter + :type cmd: list of str :param str texenc: encoding to use in the communication with the TeX interpreter :param usefiles: list of supplementary files to be copied to and from @@ -984,7 +985,7 @@ class SingleRunner: :type texmessages_run: list of :class:`texmessage` parsers """ - self.executable = executable + self.cmd = cmd self.texenc = texenc self.usefiles = usefiles self.texipc = texipc @@ -1125,7 +1126,14 @@ class SingleRunner: assert self.state == STATE_START self.state = STATE_PREAMBLE - self.tmpdir = tempfile.mkdtemp() + chroot = config.get("text", "chroot", "") + if chroot: + chroot_tmpdir = config.get("text", "tmpdir", "/tmp") + chroot_tmpdir_rel = os.path.relpath(chroot_tmpdir, os.sep) + base_tmpdir = os.path.join(chroot, chroot_tmpdir_rel) + else: + base_tmpdir = config.get("text", "tmpdir", None) + self.tmpdir = tempfile.mkdtemp(prefix="pyx", dir=base_tmpdir) atexit.register(self._cleanup) for usefile in self.usefiles: extpos = usefile.rfind(".") @@ -1133,7 +1141,11 @@ class SingleRunner: os.rename(usefile, os.path.join(self.tmpdir, "texput" + usefile[extpos:])) except OSError: pass - cmd = [self.executable, '--output-directory', self.tmpdir] + if chroot: + tex_tmpdir = os.sep + os.path.relpath(self.tmpdir, chroot) + else: + tex_tmpdir = self.tmpdir + cmd = self.cmd + ['--output-directory', tex_tmpdir] if self.texipc: cmd.append("--ipc") self.popen = config.Popen(cmd, stdin=config.PIPE, stdout=config.PIPE, stderr=config.STDOUT, bufsize=0) @@ -1289,12 +1301,13 @@ class SingleRunner: class SingleTexRunner(SingleRunner): - def __init__(self, executable=config.get("text", "tex", "tex"), lfs="10pt", **kwargs): + def __init__(self, cmd=config.getlist("text", "tex", ["tex"]), lfs="10pt", **kwargs): """Plain TeX interface. This class adjusts the :class:`SingleRunner` to use plain TeX. - :param str executable: command to start the TeX interpreter + :param cmd: command and arguments to start the TeX interpreter + :type cmd: list of str :param lfs: resemble LaTeX font settings within plain TeX by loading a lfs-file :type lfs: str or None @@ -1308,7 +1321,7 @@ class SingleTexRunner(SingleRunner): options, and style files). """ - super().__init__(executable=executable, **kwargs) + super().__init__(cmd=cmd, **kwargs) self.lfs = lfs self.name = "TeX" @@ -1341,14 +1354,16 @@ class SingleLatexRunner(SingleRunner): #: default :class:`texmessage` parsers at ``\begin{document}`` texmessages_begindoc_default = [texmessage.load, texmessage.no_aux] - def __init__(self, executable=config.get("text", "latex", "latex"), + def __init__(self, cmd=config.getlist("text", "latex", ["latex"]), docclass="article", docopt=None, pyxgraphics=True, texmessages_docclass=[], texmessages_begindoc=[], **kwargs): """LaTeX interface. This class adjusts the :class:`SingleRunner` to use LaTeX. - :param str executable: command to start the TeX interpreter + :param cmd: command and arguments to start the TeX interpreter + in LaTeX mode + :type cmd: list of str :param str docclass: document class :param docopt: document loading options :type docopt: str or None @@ -1363,7 +1378,7 @@ class SingleLatexRunner(SingleRunner): :param kwargs: additional arguments passed to :class:`SingleRunner` """ - super().__init__(executable=executable, **kwargs) + super().__init__(cmd=cmd, **kwargs) self.docclass = docclass self.docopt = docopt self.pyxgraphics = pyxgraphics -- 2.11.4.GIT