From ac2138a3cc8f0300d5bcf84d2b0adf640307aee5 Mon Sep 17 00:00:00 2001 From: dkuhlman Date: Tue, 16 May 2017 21:07:07 +0000 Subject: [PATCH] Fixes to language/region and image size control git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk@8069 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/docutils/writers/odf_odt/__init__.py | 288 +++++++++++++++++++++----- docutils/test/test_writers/test_odt.py | 2 + 2 files changed, 238 insertions(+), 52 deletions(-) diff --git a/docutils/docutils/writers/odf_odt/__init__.py b/docutils/docutils/writers/odf_odt/__init__.py index 74e28ca12..286f6b330 100644 --- a/docutils/docutils/writers/odf_odt/__init__.py +++ b/docutils/docutils/writers/odf_odt/__init__.py @@ -24,6 +24,7 @@ import StringIO import copy import urllib2 import docutils +import locale from docutils import frontend, nodes, utils, writers, languages from docutils.readers import standalone from docutils.transforms import references @@ -569,6 +570,48 @@ class Writer(writers.Writer): s1 = self.create_meta() self.write_zip_str(zfile, 'meta.xml', s1) s1 = self.get_stylesheet() + # Set default language in document to be generated. + # Language is specified by the -l/--language command line option. + # Allowed values are "ll", "ll-rr" or "ll_rr", where ll is language + # and rr is region. If region is omitted, we use + # local.normalize(ll) to obtain a region. + language_code = None + region_code = None + if len(self.visitor.normalized_language_code) > 0: + language_ids = self.visitor.normalized_language_code[0].split('-') + if len(language_ids) == 2: + language_code = language_ids[0] + region_code = language_ids[1] + elif len(language_ids) == 1: + language_code = language_ids[0] + rcode = locale.normalize(language_code) + rcode = rcode.split('_') + if len(rcode) > 1: + rcode = rcode[1] + rcode = rcode.split('.') + if len(rcode) >= 1: + region_code = rcode[0] + if region_code is None: + raise RuntimeError( + 'invalid language-region. ' + 'Could not find region with locale.normalize(). ' + 'If language is supplied, then you must specify ' + 'both lanauge and region (ll-rr). Examples: ' + 'es-mx (Spanish, Mexico), en-au (English, Australia).') + else: + raise RuntimeError( + 'invalid language-region. ' + 'Format must be "ll-rr" or "ll_rr", where ll is language ' + 'and rr is region. ' + 'See https://en.wikipedia.org/wiki/IETF_language_tag') + # Update the style ElementTree with the language and region. + # Note that we keep a reference to the modified node because + # it is possible that ElementTree will throw away the Python + # representation of the updated node if we do not. + updated, new_dom_styles, updated_node = self.update_stylesheet( + self.visitor.get_dom_stylesheet(), language_code, region_code) + if updated: + s1 = etree.tostring(new_dom_styles) self.write_zip_str(zfile, 'styles.xml', s1) self.store_embedded_files(zfile) self.copy_from_stylesheet(zfile) @@ -580,7 +623,58 @@ class Writer(writers.Writer): self.parts['encoding'] = self.document.settings.output_encoding self.parts['version'] = docutils.__version__ - def write_zip_str(self, zfile, name, bytes, compress_type=zipfile.ZIP_DEFLATED): + def update_stylesheet(self, stylesheet_root, language_code, region_code): + """Update xml style sheet element with language and region/country.""" + updated = False + modified_nodes = set() + if language_code is not None or region_code is not None: + n1 = stylesheet_root.find( + '{urn:oasis:names:tc:opendocument:xmlns:office:1.0}' + 'styles') + if n1 is None: + raise RuntimeError( + "Cannot find 'styles' element in styles.odt/styles.xml") + n2_nodes = n1.findall( + '{urn:oasis:names:tc:opendocument:xmlns:style:1.0}' + 'default-style') + if not n2_nodes: + raise RuntimeError( + "Cannot find 'default-style' " + "element in styles.xml") + for node in n2_nodes: + family = node.attrib.get( + '{urn:oasis:names:tc:opendocument:xmlns:style:1.0}' + 'family') + if family == 'paragraph' or family == 'graphic': + n3 = node.find( + '{urn:oasis:names:tc:opendocument:xmlns:style:1.0}' + 'text-properties') + if n3 is None: + raise RuntimeError( + "Cannot find 'text-properties' " + "element in styles.xml") + if language_code is not None: + n3.attrib[ + '{urn:oasis:names:tc:opendocument:xmlns:' + 'xsl-fo-compatible:1.0}language'] = language_code + n3.attrib[ + '{urn:oasis:names:tc:opendocument:xmlns:' + 'style:1.0}language-complex'] = language_code + updated = True + modified_nodes.add(n3) + if region_code is not None: + n3.attrib[ + '{urn:oasis:names:tc:opendocument:xmlns:' + 'xsl-fo-compatible:1.0}country'] = region_code + n3.attrib[ + '{urn:oasis:names:tc:opendocument:xmlns:' + 'style:1.0}country-complex'] = region_code + updated = True + modified_nodes.add(n3) + return updated, stylesheet_root, modified_nodes + + def write_zip_str( + self, zfile, name, bytes, compress_type=zipfile.ZIP_DEFLATED): localtime = time.localtime(time.time()) zinfo = zipfile.ZipInfo(name, localtime) # Add some standard UNIX file access permissions (-rw-r--r--). @@ -725,8 +819,8 @@ class Writer(writers.Writer): #s1 = doc.toprettyxml(' ') return s1 -# class ODFTranslator(nodes.SparseNodeVisitor): +# class ODFTranslator(nodes.SparseNodeVisitor): class ODFTranslator(nodes.GenericNodeVisitor): used_styles = ( @@ -784,15 +878,19 @@ class ODFTranslator(nodes.GenericNodeVisitor): 'lineblock5', 'lineblock6', 'image', 'figureframe', - ) + ) def __init__(self, document): #nodes.SparseNodeVisitor.__init__(self, document) nodes.GenericNodeVisitor.__init__(self, document) self.settings = document.settings - lcode = self.settings.language_code - self.language = languages.get_language(lcode, document.reporter) - self.format_map = { } + self.language_code = self.settings.language_code + self.language = languages.get_language( + self.language_code, + document.reporter) + self.normalized_language_code = languages.normalize_language_tag( + self.language_code) + self.format_map = {} if self.settings.odf_config_file: from ConfigParser import ConfigParser @@ -802,7 +900,7 @@ class ODFTranslator(nodes.GenericNodeVisitor): if rststyle not in self.used_styles: self.document.reporter.warning( 'Style "%s" is not a style used by odtwriter.' % ( - rststyle, )) + rststyle, )) self.format_map[rststyle] = format.decode('utf-8') self.section_level = 0 self.section_count = 0 @@ -982,18 +1080,18 @@ class ODFTranslator(nodes.GenericNodeVisitor): 'style:name': style_name, 'style:master-page-name': "rststyle-pagedefault", 'style:family': "paragraph", - }, nsdict=SNSD) + }, nsdict=SNSD) if current_style: el1.set('style:parent-style-name', current_style) el.set('text:style-name', style_name) - - def rststyle(self, name, parameters=( )): + def rststyle(self, name, parameters=()): """ Returns the style name to use for the given style. - If `parameters` is given `name` must contain a matching number of ``%`` and - is used as a format expression with `parameters` as the value. + If `parameters` is given `name` must contain a matching number of + ``%`` and is used as a format expression with `parameters` as + the value. """ name1 = name % parameters stylename = self.format_map.get(name1, 'rststyle-%s' % name1) @@ -1010,6 +1108,9 @@ class ODFTranslator(nodes.GenericNodeVisitor): new_content = etree.tostring(self.dom_stylesheet) return new_content + def get_dom_stylesheet(self): + return self.dom_stylesheet + def setup_paper(self, root_el): try: fin = os.popen("paperconf -s 2> /dev/null") @@ -2118,70 +2219,151 @@ class ODFTranslator(nodes.GenericNodeVisitor): def get_image_width_height(self, node, attr): size = None + unit = None if attr in node.attributes: size = node.attributes[attr] - unit = size[-2:] - if unit.isalpha(): - size = size[:-2] - else: - unit = 'px' + size = size.strip() + # For conversion factors, see: + # http://www.unitconversion.org/unit_converter/typography-ex.html try: - size = float(size) - except ValueError, e: + if size.endswith('%'): + if attr == 'height': + # Percentage allowed for width but not height. + raise ValueError('percentage not allowed for height') + size = size.rstrip(' %') + size = float(size) / 100.0 + unit = '%' + else: + size, unit = convert_to_cm(size) + except ValueError, exp: self.document.reporter.warning( - 'Invalid %s for image: "%s"' % ( - attr, node.attributes[attr])) - size = [size, unit] - return size + 'Invalid %s for image: "%s". ' + 'Error: "%s".' % ( + attr, node.attributes[attr], exp)) + return size, unit + + def convert_to_cm(self, size): + """Convert various units to centimeters. + + Note that a call to this method should be wrapped in: + try: except ValueError: + """ + size = size.strip() + if size.endswith('px'): + size = float(size[:-2]) * 0.026 # convert px to cm + elif size.endswith('in'): + size = float(size[:-2]) * 2.54 # convert in to cm + elif size.endswith('pt'): + size = float(size[:-2]) * 0.035 # convert pt to cm + elif size.endswith('pc'): + size = float(size[:-2]) * 2.371 # convert pc to cm + elif size.endswith('mm'): + size = float(size[:-2]) * 10.0 # convert mm to cm + elif size.endswith('cm'): + size = float(size[:-2]) + else: + raise ValueError('unknown unit type') + unit = 'cm' + return size, unit def get_image_scale(self, node): if 'scale' in node.attributes: + scale = node.attributes['scale'] try: - scale = int(node.attributes['scale']) - if scale < 1: # or scale > 100: - self.document.reporter.warning( - 'scale out of range (%s), using 1.' % (scale, )) - scale = 1 - scale = scale * 0.01 - except ValueError, e: + scale = int(scale) + except ValueError: self.document.reporter.warning( 'Invalid scale for image: "%s"' % ( node.attributes['scale'], )) + if scale < 1: # or scale > 100: + self.document.reporter.warning( + 'scale out of range (%s), using 1.' % (scale, )) + scale = 1 + scale = scale * 0.01 else: scale = 1.0 return scale def get_image_scaled_width_height(self, node, source): + """Return the image size in centimeters adjusted by image attrs.""" scale = self.get_image_scale(node) - width = self.get_image_width_height(node, 'width') - height = self.get_image_width_height(node, 'height') - + width, width_unit = self.get_image_width_height(node, 'width') + height, _ = self.get_image_width_height(node, 'height') dpi = (72, 72) if PIL is not None and source in self.image_dict: filename, destination = self.image_dict[source] imageobj = PIL.Image.open(filename, 'r') dpi = imageobj.info.get('dpi', dpi) # dpi information can be (xdpi, ydpi) or xydpi - try: iter(dpi) - except: dpi = (dpi, dpi) + try: + iter(dpi) + except: + dpi = (dpi, dpi) else: imageobj = None - if width is None or height is None: if imageobj is None: raise RuntimeError( 'image size not fully specified and PIL not installed') - if width is None: width = [imageobj.size[0], 'px'] - if height is None: height = [imageobj.size[1], 'px'] - - width[0] *= scale - height[0] *= scale - if width[1] == 'px': width = [width[0] / dpi[0], 'in'] - if height[1] == 'px': height = [height[0] / dpi[1], 'in'] - - width[0] = str(width[0]) - height[0] = str(height[0]) - return ''.join(width), ''.join(height) + if width is None: + width = imageobj.size[0] + width = float(width) * 0.026 # convert px to cm + if height is None: + height = imageobj.size[1] + height = float(height) * 0.026 # convert px to cm + if width_unit == '%': + factor = width + image_width = imageobj.size[0] + image_width = float(image_width) * 0.026 # convert px to cm + image_height = imageobj.size[1] + image_height = float(image_height) * 0.026 # convert px to cm + line_width = self.get_page_width() + width = factor * line_width + factor = (factor * line_width) / image_width + height = factor * image_height + width *= scale + height *= scale + width = '%.2fcm' % width + height = '%.2fcm' % height + return width, height + + def get_page_width(self): + """Return the document's page width in centimeters.""" + root = self.get_dom_stylesheet() + nodes = root.iterfind( + './/{urn:oasis:names:tc:opendocument:xmlns:style:1.0}' + 'page-layout/' + '{urn:oasis:names:tc:opendocument:xmlns:style:1.0}' + 'page-layout-properties') + width = None + for node in nodes: + page_width = node.get( + '{urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0}' + 'page-width') + margin_left = node.get( + '{urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0}' + 'margin-left') + margin_right = node.get( + '{urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0}' + 'margin-right') + if (page_width is None or + margin_left is None or + margin_right is None): + continue + try: + page_width, _ = self.convert_to_cm(page_width) + margin_left, _ = self.convert_to_cm(margin_left) + margin_right, _ = self.convert_to_cm(margin_right) + except ValueError, exp: + self.document.reporter.warning( + 'Stylesheet file contains invalid page width ' + 'or margin size.') + width = page_width - margin_left - margin_right + if width is None: + # We can't find the width in styles, so we make a guess. + # Use a width of 6 in = 15.24 cm. + width = 15.24 + return width def generate_figure(self, node, source, destination, current_element): caption = None @@ -3263,14 +3445,16 @@ class ODFTranslator(nodes.GenericNodeVisitor): depart_admonition = depart_warning def generate_admonition(self, node, label, title=None): - el1 = SubElement(self.current_element, 'text:p', attrib = { - 'text:style-name': self.rststyle('admon-%s-hdr', ( label, )), - }) + translated_label = self.language.labels[label] + el1 = SubElement(self.current_element, 'text:p', attrib={ + 'text:style-name': self.rststyle( + 'admon-%s-hdr', (label, )), + }) if title: el1.text = title else: - el1.text = '%s!' % (label.capitalize(), ) - s1 = self.rststyle('admon-%s-body', ( label, )) + el1.text = '%s!' % (translated_label.capitalize(), ) + s1 = self.rststyle('admon-%s-body', (label, )) self.paragraph_style_stack.append(s1) # diff --git a/docutils/test/test_writers/test_odt.py b/docutils/test/test_writers/test_odt.py index b73b5e080..e50baed9e 100755 --- a/docutils/test/test_writers/test_odt.py +++ b/docutils/test/test_writers/test_odt.py @@ -96,6 +96,7 @@ class DocutilsOdtTestCase(DocutilsTestSupport.StandardTestCase): if settings_overrides is None: settings_overrides={} settings_overrides['_disable_config'] = True + settings_overrides['language_code'] = 'en-US' result = docutils.core.publish_string( source=input, @@ -175,6 +176,7 @@ class DocutilsOdtTestCase(DocutilsTestSupport.StandardTestCase): settings_overrides = { 'custom_header': 'Page %p% of %P%', 'custom_footer': 'Title: %t% Date: %d3% Time: %t4%', + 'language_code': 'en-US', } self.process_test('odt_custom_headfoot.txt', 'odt_custom_headfoot.odt', settings_overrides=settings_overrides, -- 2.11.4.GIT