From 7753b026e8cfba01e5da88ffaf228586fe1e0807 Mon Sep 17 00:00:00 2001 From: milde Date: Wed, 14 Dec 2011 23:53:38 +0000 Subject: [PATCH] Clean up record_dependencies feature. Use utf8 encoding for the record file: simple, failsave and reproducible way for portable storage of non-ASCII filenames (cf. http://www.dwheeler.com/essays/fixing-unix-linux-filenames.html). Drop latex2e exception: Record only files required to generate the LaTeX source. git-svn-id: https://docutils.svn.sourceforge.net/svnroot/docutils/trunk/docutils@7256 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- HISTORY.txt | 5 +- docs/user/config.txt | 35 +++-- docutils/parsers/rst/directives/images.py | 28 ++-- docutils/parsers/rst/directives/misc.py | 4 +- docutils/utils.py | 10 +- docutils/writers/html4css1/__init__.py | 34 +++-- docutils/writers/latex2e/__init__.py | 5 +- test/data/dependencies.txt | 23 +++- test/test_dependencies.py | 215 +++++++++++++++++------------- 9 files changed, 212 insertions(+), 147 deletions(-) rewrite test/test_dependencies.py (71%) diff --git a/HISTORY.txt b/HISTORY.txt index 18e705ef4..6ba0dd3ac 100644 --- a/HISTORY.txt +++ b/HISTORY.txt @@ -32,8 +32,8 @@ Changes Since 0.8.1 * docutils/utils.py - - DependencyList uses io.FileOutput to prevent errors recording - non-ASCII filenames (fixes [ 3434355 ]. + - DependencyList uses io.FileOutput and 'utf8' encoding to prevent + errors recording non-ASCII filenames (fixes [ 3434355 ]. * docutils/parsers/rst/states.py @@ -48,6 +48,7 @@ Changes Since 0.8.1 * docutils/writers/latex2e/__init__.py - Support the `abbreviation` and `acronym` standard roles. + - Record only files required to generate the LaTeX source as dependencies. * docutils/writers/html4css1/__init__.py diff --git a/docs/user/config.txt b/docs/user/config.txt index 3d1894b10..196bc09c6 100644 --- a/docs/user/config.txt +++ b/docs/user/config.txt @@ -387,14 +387,24 @@ _`output_encoding_error_handler` --output-encoding, -o``. _`record_dependencies` - Path to a file where Docutils will write a list of files that the - input and output depend on [#dependencies]_, e.g. due to file - inclusion. [#pwd]_ The format is one filename per line. This - option is particularly useful in conjunction with programs like - ``make``. + Path to a file where Docutils will write a list of files that were + required to generate the output, e.g. included files or embedded + stylesheets [#dependencies]_. [#pwd]_ The format is one path per + line with forward slashes as separator, the encoding is ``utf8``. Set to ``-`` in order to write dependencies to stdout. + This option is particularly useful in conjunction with programs like + ``make`` using ``Makefile`` rules like:: + + ham.html: ham.txt $(shell cat hamdeps.txt) + rst2html.py --record-dependencies=hamdeps.txt ham.txt ham.html + + If the filesystem encoding differs from utf8, replace the ``cat`` + command with a call to a converter, e.g.:: + + $(shell iconv -f utf8 -t latin1 hamdeps.txt) + Default: None. Option: ``--record-dependencies``. _`report_level` @@ -1436,19 +1446,8 @@ _`_source` do the overriding explicitly, by assigning ``None`` to the other settings. -.. [#dependencies] Some notes on the dependency recorder: - - * Images are only added to the dependency list if the - reStructuredText parser extracted image dimensions from the file. - - * Stylesheets are only added if they are embedded. - - * For practical reasons, the output of the LaTeX writer is - considered merely an *intermediate* processing stage. The - dependency recorder records all files the *rendered* file - (e.g. in PDF or DVI format) depends on. Thus, images and - stylesheets are both unconditionally recorded as dependencies - when using the LaTeX writer. +.. [#dependencies] Images are only added to the dependency list if the + reStructuredText parser extracted image dimensions from the file. .. [#footnote_space] The footnote space is trimmed if the reference style is "superscript", and it is left if the reference style is diff --git a/docutils/parsers/rst/directives/images.py b/docutils/parsers/rst/directives/images.py index 7ac9d7768..7adda2edf 100644 --- a/docutils/parsers/rst/directives/images.py +++ b/docutils/parsers/rst/directives/images.py @@ -10,17 +10,21 @@ __docformat__ = 'reStructuredText' import sys +import urllib from docutils import nodes, utils from docutils.parsers.rst import Directive from docutils.parsers.rst import directives, states from docutils.nodes import fully_normalize_name, whitespace_normalize_name from docutils.parsers.rst.roles import set_classes - -try: - import Image as PIL # PIL +try: # check for the Python Imaging Library + import PIL except ImportError: - PIL = None - + try: # sometimes PIL modules are put in PYTHONPATH's root + import Image + class PIL(object): pass # dummy wrapper + PIL.Image = Image + except ImportError: + PIL = None class Image(Directive): @@ -121,15 +125,17 @@ class Figure(Image): figure_node = nodes.figure('', image_node) if figwidth == 'image': if PIL and self.state.document.settings.file_insertion_enabled: - # PIL doesn't like Unicode paths: + imagepath = urllib.url2pathname(image_node['uri']) try: - i = PIL.open(str(image_node['uri'])) - except (IOError, UnicodeError): - pass + img = PIL.Image.open( + imagepath.encode(sys.getfilesystemencoding())) + except (IOError, UnicodeEncodeError): + pass # TODO: warn? else: self.state.document.settings.record_dependencies.add( - image_node['uri']) - figure_node['width'] = i.size[0] + imagepath.replace('\\', '/')) + figure_node['width'] = img.size[0] + del img elif figwidth is not None: figure_node['width'] = figwidth if figclasses: diff --git a/docutils/parsers/rst/directives/misc.py b/docutils/parsers/rst/directives/misc.py index 8641b24e8..3c89ce8bb 100644 --- a/docutils/parsers/rst/directives/misc.py +++ b/docutils/parsers/rst/directives/misc.py @@ -198,12 +198,14 @@ class Raw(Directive): self.options['file'])) path = utils.relative_path(None, path) try: - self.state.document.settings.record_dependencies.add(path) raw_file = io.FileInput( source_path=path, encoding=encoding, error_handler=(self.state.document.settings.\ input_encoding_error_handler), handle_io_errors=None) + # TODO: currently, raw input files are recorded as + # dependencies even if not used for the chosen output format. + self.state.document.settings.record_dependencies.add(path) except IOError, error: raise self.severe(u'Problems with "%s" directive path:\n%s.' % (self.name, ErrorString(error))) diff --git a/docutils/utils.py b/docutils/utils.py index 98bed5606..ed3abfd21 100644 --- a/docutils/utils.py +++ b/docutils/utils.py @@ -662,7 +662,7 @@ def normalize_language_tag(tag): return taglist -class DependencyList: +class DependencyList(object): """ List of dependencies, with file recording support. @@ -699,9 +699,7 @@ class DependencyList: else: of = output_file self.file = FileOutput(destination_path=of, - encoding=sys.getfilesystemencoding(), - error_handler='xmlcharrefreplace', - autoclose=False) + encoding='utf8', autoclose=False) else: self.file = None @@ -725,8 +723,8 @@ class DependencyList: self.file = None def __repr__(self): - if self.file: + try: output_file = self.file.name - else: + except AttributeError: output_file = None return '%s(%r, %s)' % (self.__class__.__name__, output_file, self.list) diff --git a/docutils/writers/html4css1/__init__.py b/docutils/writers/html4css1/__init__.py index 4daa74fbb..ec1edfb82 100644 --- a/docutils/writers/html4css1/__init__.py +++ b/docutils/writers/html4css1/__init__.py @@ -19,10 +19,16 @@ import os import os.path import time import re -try: - import Image # check for the Python Imaging Library +import urllib +try: # check for the Python Imaging Library + import PIL except ImportError: - Image = None + try: # sometimes PIL modules are put in PYTHONPATH's root + import Image + class PIL(object): pass # dummy wrapper + PIL.Image = Image + except ImportError: + PIL = None import docutils from docutils import frontend, nodes, utils, writers, languages, io from docutils.transforms import writer_aux @@ -286,10 +292,10 @@ class HTMLTranslator(nodes.NodeVisitor): styles = [utils.relative_path(settings._destination, sheet) for sheet in styles] if settings.embed_stylesheet: - settings.record_dependencies.add(*styles) self.stylesheet = [self.embedded_stylesheet % io.FileInput(source_path=sheet, encoding='utf-8').read() for sheet in styles] + settings.record_dependencies.add(*styles) else: # link to stylesheets self.stylesheet = [self.stylesheet_link % self.encode(stylesheet) for stylesheet in styles] @@ -1006,18 +1012,22 @@ class HTMLTranslator(nodes.NodeVisitor): if 'height' in node: atts['height'] = node['height'] if 'scale' in node: - if Image and not ('width' in node and 'height' in node): + if (PIL and not ('width' in node and 'height' in node) + and self.settings.file_insertion_enabled): + imagepath = urllib.url2pathname(uri) try: - im = Image.open(str(uri)) - except (IOError, # Source image can't be found or opened - UnicodeError): # PIL doesn't like Unicode paths. - pass + img = PIL.Image.open( + imagepath.encode(sys.getfilesystemencoding())) + except (IOError, UnicodeEncodeError): + pass # TODO: warn? else: + self.settings.record_dependencies.add( + imagepath.replace('\\', '/')) if 'width' not in atts: - atts['width'] = str(im.size[0]) + atts['width'] = str(img.size[0]) if 'height' not in atts: - atts['height'] = str(im.size[1]) - del im + atts['height'] = str(img.size[1]) + del img for att_name in 'width', 'height': if att_name in atts: match = re.match(r'([0-9.]+)(\S*)$', atts[att_name]) diff --git a/docutils/writers/latex2e/__init__.py b/docutils/writers/latex2e/__init__.py index 8ff974cfe..c21f5016f 100644 --- a/docutils/writers/latex2e/__init__.py +++ b/docutils/writers/latex2e/__init__.py @@ -1284,6 +1284,8 @@ class LaTeXTranslator(nodes.NodeVisitor): # Unicode chars that are not recognized by LaTeX's utf8 encoding unsupported_unicode_chars = { 0x00A0: ur'~', # NO-BREAK SPACE + # TODO: ensure white space also at the beginning of a line? + # 0x00A0: ur'\leavevmode\nobreak\vadjust{}~' 0x00AD: ur'\-', # SOFT HYPHEN # 0x2008: ur'\,', # PUNCTUATION SPACE    @@ -2225,9 +2227,8 @@ class LaTeXTranslator(nodes.NodeVisitor): def visit_image(self, node): self.requirements['graphicx'] = self.graphicx_package attrs = node.attributes - # Convert image URI to a local file path and add to dependency list + # Convert image URI to a local file path imagepath = urllib.url2pathname(attrs['uri']).replace('\\', '/') - self.settings.record_dependencies.add(imagepath) # alignment defaults: if not 'align' in attrs: # Set default align of image in a figure to 'center' diff --git a/test/data/dependencies.txt b/test/data/dependencies.txt index cbe65e3bf..cf1660dcd 100644 --- a/test/data/dependencies.txt +++ b/test/data/dependencies.txt @@ -1,10 +1,29 @@ -.. image:: some_image.png +Test input for test_dependencies. + +Docutils can write a list of files required to generate the output like +included files or embedded stylesheets. This is particularly useful in +conjunction with programs like ``make``. + +Included files are recorded: .. include:: include.txt .. raw:: HTML :file: raw.txt +Dependencies are recorded only once: + .. include:: include.txt -.. image:: картина.jpg +Image files are only recorded, if actually accessed +(to extract the size or if embedded in the output document): + +.. image:: test.jpg + +.. figure:: ../docs/user/rst/images/title.png + :figwidth: image + +Scaled images without given size are recorded by the html writer: + +.. image:: ../docs/user/rst/images/biohazard.png + :scale: 50 % diff --git a/test/test_dependencies.py b/test/test_dependencies.py dissimilarity index 71% index 48a763e00..611fba85c 100755 --- a/test/test_dependencies.py +++ b/test/test_dependencies.py @@ -1,93 +1,122 @@ -#! /usr/bin/env python - -# $Id$ -# Author: Lea Wiemann -# Copyright: This module has been placed in the public domain. - -""" -Test module for the --record-dependencies option. -""" - -import os.path -import unittest -import sys -import DocutilsTestSupport # must be imported before docutils -import docutils.core -import docutils.utils -import docutils.io - - -class RecordDependenciesTests(unittest.TestCase): - - # docutils.utils.DependencyList records relative URLs, not platform paths, - # so use "/" as a path separator even on Windows (not os.path.join). - - def get_record(self, **settings): - recordfile = 'record.txt' - settings.setdefault('source_path', - os.path.join('data', 'dependencies.txt')) - settings.setdefault('settings_overrides', {}) - settings['settings_overrides'] = settings['settings_overrides'].copy() - settings['settings_overrides']['_disable_config'] = 1 - if 'record_dependencies' not in settings['settings_overrides']: - settings['settings_overrides']['record_dependencies'] = \ - docutils.utils.DependencyList(recordfile) - docutils.core.publish_file( - destination=DocutilsTestSupport.DevNull(), **settings) - settings['settings_overrides']['record_dependencies'].close() - record = docutils.io.FileInput(source_path=recordfile, - encoding='utf8') - return record.read().splitlines() - - def test_dependencies(self): - self.assertEqual(self.get_record(), - ['data/include.txt', 'data/raw.txt']) - self.assertEqual(self.get_record(writer_name='latex'), - ['data/include.txt', - 'data/raw.txt', - # this is a URL, not a path: - 'some_image.png', - # cyrillic filename (testing with an image, because - # this does not abort if the file does not exist): - u'\u043a\u0430\u0440\u0442\u0438\u043d\u0430.jpg']) - - def test_csv_dependencies(self): - try: - import csv - self.assertEqual( - self.get_record(source_path=os.path.join('data', - 'csv_dep.txt')), - ['data/csv_data.txt']) - except ImportError: - pass - - def test_stylesheet_dependencies(self): - # Parameters to publish_file. - s = {'settings_overrides': {}} - so = s['settings_overrides'] - so['embed_stylesheet'] = 0 - # must use '/', not os.sep or os.path.join, because of URL handling - # (see docutils.utils.relative_path): - stylesheet_path = 'data/stylesheet.txt' - so['stylesheet_path'] = stylesheet_path - so['stylesheet'] = None - s['writer_name'] = 'html' - record = self.get_record(**s) - self.assert_(stylesheet_path not in record, - '%r should not be in %r' % (stylesheet_path, record)) - so['embed_stylesheet'] = 1 - record = self.get_record(**s) - self.assert_(stylesheet_path in record, - '%r should be in %r' % (stylesheet_path, record)) - s['writer_name'] = 'latex' - record = self.get_record(**s) - self.assert_(stylesheet_path in record, - '%r should be in %r' % (stylesheet_path, record)) - del so['embed_stylesheet'] - record = self.get_record(**s) - self.assert_(stylesheet_path not in record, - '%r should not be in %r' % (stylesheet_path, record)) - - -if __name__ == '__main__': - unittest.main() +#! /usr/bin/env python + +# $Id$ +# Author: Lea Wiemann +# Copyright: This module has been placed in the public domain. + +""" +Test module for the --record-dependencies option. +""" + +import os.path +import unittest +import sys +import DocutilsTestSupport # must be imported before docutils +import docutils.core +import docutils.utils +import docutils.io +from docutils.parsers.rst.directives.images import PIL + +# docutils.utils.DependencyList records POSIX paths, +# i.e. "/" as a path separator even on Windows (not os.path.join). +paths = {'include': u'data/include.txt', # included rst file + 'raw': u'data/raw.txt', # included raw "HTML file" + 'scaled-image': u'../docs/user/rst/images/biohazard.png', + 'figure-image': u'../docs/user/rst/images/title.png', + 'stylesheet': u'data/stylesheet.txt', + 'default-stylesheet': u'../docutils/writers/html4css1/html4css1.css', + } + + +class RecordDependenciesTests(unittest.TestCase): + + def get_record(self, **settings): + recordfile = 'record.txt' + recorder = docutils.utils.DependencyList(recordfile) + # (Re) create the record file by running a conversion: + settings.setdefault('source_path', + os.path.join('data', 'dependencies.txt')) + settings.setdefault('settings_overrides', {}) + settings['settings_overrides'].update(_disable_config=True, + record_dependencies=recorder) + docutils.core.publish_file(destination=DocutilsTestSupport.DevNull(), + **settings) + recorder.close() + # Read the record file: + record = docutils.io.FileInput(source_path=recordfile, + encoding='utf8') + return record.read().splitlines() + + def test_dependencies(self): + # Note: currently, raw input files are read (and hence recorded) while + # parsing even if not used in the chosen output format. + # This should change (see parsers/rst/directives/misc.py). + keys = ['include', 'raw'] + if PIL: + keys += ['figure-image'] + expected = [paths[key] for key in keys] + record = self.get_record(writer_name='xml') + # the order of the files is arbitrary + record.sort() + expected.sort() + self.assertEqual(record, expected) + + def test_dependencies_html(self): + keys = ['include', 'raw', 'default-stylesheet'] + if PIL: + keys += ['figure-image', 'scaled-image'] + expected = [paths[key] for key in keys] + record = self.get_record(writer_name='html') + # the order of the files is arbitrary + record.sort() + expected.sort() + self.assertEqual(record, expected) + + def test_dependencies_latex(self): + # since 0.9, the latex writer records only really accessed files, too + # Note: currently, raw input files are read (and hence recorded) while + # parsing even if not used in the chosen output format. + # This should change (see parsers/rst/directives/misc.py). + keys = ['include', 'raw'] + if PIL: + keys += ['figure-image'] + expected = [paths[key] for key in keys] + record = self.get_record(writer_name='latex') + # the order of the files is arbitrary + record.sort() + expected.sort() + self.assertEqual(record, expected) + + def test_csv_dependencies(self): + try: + import csv + csvsource = os.path.join('data', 'csv_dep.txt') + self.assertEqual(self.get_record(source_path=csvsource), + ['data/csv_data.txt']) + except ImportError: + pass + + def test_stylesheet_dependencies(self): + stylesheet = paths['stylesheet'] + so = {'stylesheet_path': paths['stylesheet'], + 'stylesheet': None} + + so['embed_stylesheet'] = False + record = self.get_record(writer_name='html', settings_overrides=so) + self.assert_(stylesheet not in record, + '%r should not be in %r' % (stylesheet, record)) + record = self.get_record(writer_name='latex', settings_overrides=so) + self.assert_(stylesheet not in record, + '%r should not be in %r' % (stylesheet, record)) + + so['embed_stylesheet'] = True + record = self.get_record(writer_name='html', settings_overrides=so) + self.assert_(stylesheet in record, + '%r should be in %r' % (stylesheet, record)) + record = self.get_record(writer_name='latex', settings_overrides=so) + self.assert_(stylesheet in record, + '%r should be in %r' % (stylesheet, record)) + + +if __name__ == '__main__': + unittest.main() -- 2.11.4.GIT