From ac2138a3cc8f0300d5bcf84d2b0adf640307aee5 Mon Sep 17 00:00:00 2001
From: dkuhlman <dkuhlman@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>
Date: Tue, 16 May 2017 21:07:07 +0000
Subject: [PATCH] Fixes to language/region and image size control

git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk@8069 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
---
 docutils/docutils/writers/odf_odt/__init__.py | 288 +++++++++++++++++++++-----
 docutils/test/test_writers/test_odt.py        |   2 +
 2 files changed, 238 insertions(+), 52 deletions(-)

diff --git a/docutils/docutils/writers/odf_odt/__init__.py b/docutils/docutils/writers/odf_odt/__init__.py
index 74e28ca12..286f6b330 100644
--- a/docutils/docutils/writers/odf_odt/__init__.py
+++ b/docutils/docutils/writers/odf_odt/__init__.py
@@ -24,6 +24,7 @@ import StringIO
 import copy
 import urllib2
 import docutils
+import locale
 from docutils import frontend, nodes, utils, writers, languages
 from docutils.readers import standalone
 from docutils.transforms import references
@@ -569,6 +570,48 @@ class Writer(writers.Writer):
         s1 = self.create_meta()
         self.write_zip_str(zfile, 'meta.xml', s1)
         s1 = self.get_stylesheet()
+        # Set default language in document to be generated.
+        # Language is specified by the -l/--language command line option.
+        # Allowed values are "ll", "ll-rr" or "ll_rr", where ll is language
+        # and rr is region.  If region is omitted, we use
+        # local.normalize(ll) to obtain a region.
+        language_code = None
+        region_code = None
+        if len(self.visitor.normalized_language_code) > 0:
+            language_ids = self.visitor.normalized_language_code[0].split('-')
+            if len(language_ids) == 2:
+                language_code = language_ids[0]
+                region_code = language_ids[1]
+            elif len(language_ids) == 1:
+                language_code = language_ids[0]
+                rcode = locale.normalize(language_code)
+                rcode = rcode.split('_')
+                if len(rcode) > 1:
+                    rcode = rcode[1]
+                    rcode = rcode.split('.')
+                    if len(rcode) >= 1:
+                        region_code = rcode[0]
+                if region_code is None:
+                    raise RuntimeError(
+                        'invalid language-region.  '
+                        'Could not find region with locale.normalize().  '
+                        'If language is supplied, then you must specify '
+                        'both lanauge and region (ll-rr).  Examples: '
+                        'es-mx (Spanish, Mexico), en-au (English, Australia).')
+        else:
+            raise RuntimeError(
+                'invalid language-region. '
+                'Format must be "ll-rr" or "ll_rr", where ll is language '
+                'and rr is region. '
+                'See https://en.wikipedia.org/wiki/IETF_language_tag')
+        # Update the style ElementTree with the language and region.
+        # Note that we keep a reference to the modified node because
+        # it is possible that ElementTree will throw away the Python
+        # representation of the updated node if we do not.
+        updated, new_dom_styles, updated_node = self.update_stylesheet(
+            self.visitor.get_dom_stylesheet(), language_code, region_code)
+        if updated:
+            s1 = etree.tostring(new_dom_styles)
         self.write_zip_str(zfile, 'styles.xml', s1)
         self.store_embedded_files(zfile)
         self.copy_from_stylesheet(zfile)
@@ -580,7 +623,58 @@ class Writer(writers.Writer):
         self.parts['encoding'] = self.document.settings.output_encoding
         self.parts['version'] = docutils.__version__
 
-    def write_zip_str(self, zfile, name, bytes, compress_type=zipfile.ZIP_DEFLATED):
+    def update_stylesheet(self, stylesheet_root, language_code, region_code):
+        """Update xml style sheet element with language and region/country."""
+        updated = False
+        modified_nodes = set()
+        if language_code is not None or region_code is not None:
+            n1 = stylesheet_root.find(
+                '{urn:oasis:names:tc:opendocument:xmlns:office:1.0}'
+                'styles')
+            if n1 is None:
+                raise RuntimeError(
+                    "Cannot find 'styles' element in styles.odt/styles.xml")
+            n2_nodes = n1.findall(
+                '{urn:oasis:names:tc:opendocument:xmlns:style:1.0}'
+                'default-style')
+            if not n2_nodes:
+                raise RuntimeError(
+                    "Cannot find 'default-style' "
+                    "element in styles.xml")
+            for node in n2_nodes:
+                family = node.attrib.get(
+                    '{urn:oasis:names:tc:opendocument:xmlns:style:1.0}'
+                    'family')
+                if family == 'paragraph' or family == 'graphic':
+                    n3 = node.find(
+                        '{urn:oasis:names:tc:opendocument:xmlns:style:1.0}'
+                        'text-properties')
+                    if n3 is None:
+                        raise RuntimeError(
+                            "Cannot find 'text-properties' "
+                            "element in styles.xml")
+                    if language_code is not None:
+                        n3.attrib[
+                            '{urn:oasis:names:tc:opendocument:xmlns:'
+                            'xsl-fo-compatible:1.0}language'] = language_code
+                        n3.attrib[
+                            '{urn:oasis:names:tc:opendocument:xmlns:'
+                            'style:1.0}language-complex'] = language_code
+                        updated = True
+                        modified_nodes.add(n3)
+                    if region_code is not None:
+                        n3.attrib[
+                            '{urn:oasis:names:tc:opendocument:xmlns:'
+                            'xsl-fo-compatible:1.0}country'] = region_code
+                        n3.attrib[
+                            '{urn:oasis:names:tc:opendocument:xmlns:'
+                            'style:1.0}country-complex'] = region_code
+                        updated = True
+                        modified_nodes.add(n3)
+        return updated, stylesheet_root, modified_nodes
+
+    def write_zip_str(
+            self, zfile, name, bytes, compress_type=zipfile.ZIP_DEFLATED):
         localtime = time.localtime(time.time())
         zinfo = zipfile.ZipInfo(name, localtime)
         # Add some standard UNIX file access permissions (-rw-r--r--).
@@ -725,8 +819,8 @@ class Writer(writers.Writer):
         #s1 = doc.toprettyxml('  ')
         return s1
 
-# class ODFTranslator(nodes.SparseNodeVisitor):
 
+# class ODFTranslator(nodes.SparseNodeVisitor):
 class ODFTranslator(nodes.GenericNodeVisitor):
 
     used_styles = (
@@ -784,15 +878,19 @@ class ODFTranslator(nodes.GenericNodeVisitor):
         'lineblock5',
         'lineblock6',
         'image', 'figureframe',
-        )
+    )
 
     def __init__(self, document):
         #nodes.SparseNodeVisitor.__init__(self, document)
         nodes.GenericNodeVisitor.__init__(self, document)
         self.settings = document.settings
-        lcode = self.settings.language_code
-        self.language = languages.get_language(lcode, document.reporter)
-        self.format_map = { }
+        self.language_code = self.settings.language_code
+        self.language = languages.get_language(
+            self.language_code,
+            document.reporter)
+        self.normalized_language_code = languages.normalize_language_tag(
+            self.language_code)
+        self.format_map = {}
         if self.settings.odf_config_file:
             from ConfigParser import ConfigParser
 
@@ -802,7 +900,7 @@ class ODFTranslator(nodes.GenericNodeVisitor):
                 if rststyle not in self.used_styles:
                     self.document.reporter.warning(
                         'Style "%s" is not a style used by odtwriter.' % (
-                        rststyle, ))
+                            rststyle, ))
                 self.format_map[rststyle] = format.decode('utf-8')
         self.section_level = 0
         self.section_count = 0
@@ -982,18 +1080,18 @@ class ODFTranslator(nodes.GenericNodeVisitor):
                 'style:name': style_name,
                 'style:master-page-name': "rststyle-pagedefault",
                 'style:family': "paragraph",
-                }, nsdict=SNSD)
+            }, nsdict=SNSD)
         if current_style:
             el1.set('style:parent-style-name', current_style)
         el.set('text:style-name', style_name)
 
-
-    def rststyle(self, name, parameters=( )):
+    def rststyle(self, name, parameters=()):
         """
         Returns the style name to use for the given style.
 
-        If `parameters` is given `name` must contain a matching number of ``%`` and
-        is used as a format expression with `parameters` as the value.
+        If `parameters` is given `name` must contain a matching number of
+        ``%`` and is used as a format expression with `parameters` as
+        the value.
         """
         name1 = name % parameters
         stylename = self.format_map.get(name1, 'rststyle-%s' % name1)
@@ -1010,6 +1108,9 @@ class ODFTranslator(nodes.GenericNodeVisitor):
         new_content = etree.tostring(self.dom_stylesheet)
         return new_content
 
+    def get_dom_stylesheet(self):
+        return self.dom_stylesheet
+
     def setup_paper(self, root_el):
         try:
             fin = os.popen("paperconf -s 2> /dev/null")
@@ -2118,70 +2219,151 @@ class ODFTranslator(nodes.GenericNodeVisitor):
 
     def get_image_width_height(self, node, attr):
         size = None
+        unit = None
         if attr in node.attributes:
             size = node.attributes[attr]
-            unit = size[-2:]
-            if unit.isalpha():
-                size = size[:-2]
-            else:
-                unit = 'px'
+            size = size.strip()
+            # For conversion factors, see:
+            # http://www.unitconversion.org/unit_converter/typography-ex.html
             try:
-                size = float(size)
-            except ValueError, e:
+                if size.endswith('%'):
+                    if attr == 'height':
+                        # Percentage allowed for width but not height.
+                        raise ValueError('percentage not allowed for height')
+                    size = size.rstrip(' %')
+                    size = float(size) / 100.0
+                    unit = '%'
+                else:
+                    size, unit = convert_to_cm(size)
+            except ValueError, exp:
                 self.document.reporter.warning(
-                    'Invalid %s for image: "%s"' % (
-                        attr, node.attributes[attr]))
-            size = [size, unit]
-        return size
+                    'Invalid %s for image: "%s".  '
+                    'Error: "%s".' % (
+                        attr, node.attributes[attr], exp))
+        return size, unit
+
+    def convert_to_cm(self, size):
+        """Convert various units to centimeters.
+
+        Note that a call to this method should be wrapped in:
+            try: except ValueError:
+        """
+        size = size.strip()
+        if size.endswith('px'):
+            size = float(size[:-2]) * 0.026     # convert px to cm
+        elif size.endswith('in'):
+            size = float(size[:-2]) * 2.54      # convert in to cm
+        elif size.endswith('pt'):
+            size = float(size[:-2]) * 0.035     # convert pt to cm
+        elif size.endswith('pc'):
+            size = float(size[:-2]) * 2.371     # convert pc to cm
+        elif size.endswith('mm'):
+            size = float(size[:-2]) * 10.0      # convert mm to cm
+        elif size.endswith('cm'):
+            size = float(size[:-2])
+        else:
+            raise ValueError('unknown unit type')
+        unit = 'cm'
+        return size, unit
 
     def get_image_scale(self, node):
         if 'scale' in node.attributes:
+            scale = node.attributes['scale']
             try:
-                scale = int(node.attributes['scale'])
-                if scale < 1: # or scale > 100:
-                    self.document.reporter.warning(
-                        'scale out of range (%s), using 1.' % (scale, ))
-                    scale = 1
-                scale = scale * 0.01
-            except ValueError, e:
+                scale = int(scale)
+            except ValueError:
                 self.document.reporter.warning(
                     'Invalid scale for image: "%s"' % (
                         node.attributes['scale'], ))
+            if scale < 1:       # or scale > 100:
+                self.document.reporter.warning(
+                    'scale out of range (%s), using 1.' % (scale, ))
+                scale = 1
+            scale = scale * 0.01
         else:
             scale = 1.0
         return scale
 
     def get_image_scaled_width_height(self, node, source):
+        """Return the image size in centimeters adjusted by image attrs."""
         scale = self.get_image_scale(node)
-        width = self.get_image_width_height(node, 'width')
-        height = self.get_image_width_height(node, 'height')
-
+        width, width_unit = self.get_image_width_height(node, 'width')
+        height, _ = self.get_image_width_height(node, 'height')
         dpi = (72, 72)
         if PIL is not None and source in self.image_dict:
             filename, destination = self.image_dict[source]
             imageobj = PIL.Image.open(filename, 'r')
             dpi = imageobj.info.get('dpi', dpi)
             # dpi information can be (xdpi, ydpi) or xydpi
-            try: iter(dpi)
-            except: dpi = (dpi, dpi)
+            try:
+                iter(dpi)
+            except:
+                dpi = (dpi, dpi)
         else:
             imageobj = None
-
         if width is None or height is None:
             if imageobj is None:
                 raise RuntimeError(
                     'image size not fully specified and PIL not installed')
-            if width is None: width = [imageobj.size[0], 'px']
-            if height is None: height = [imageobj.size[1], 'px']
-
-        width[0] *= scale
-        height[0] *= scale
-        if width[1] == 'px': width = [width[0] / dpi[0], 'in']
-        if height[1] == 'px': height = [height[0] / dpi[1], 'in']
-
-        width[0] = str(width[0])
-        height[0] = str(height[0])
-        return ''.join(width), ''.join(height)
+            if width is None:
+                width = imageobj.size[0]
+                width = float(width) * 0.026        # convert px to cm
+            if height is None:
+                height = imageobj.size[1]
+                height = float(height) * 0.026      # convert px to cm
+            if width_unit == '%':
+                factor = width
+                image_width = imageobj.size[0]
+                image_width = float(image_width) * 0.026    # convert px to cm
+                image_height = imageobj.size[1]
+                image_height = float(image_height) * 0.026  # convert px to cm
+                line_width = self.get_page_width()
+                width = factor * line_width
+                factor = (factor * line_width) / image_width
+                height = factor * image_height
+        width *= scale
+        height *= scale
+        width = '%.2fcm' % width
+        height = '%.2fcm' % height
+        return width, height
+
+    def get_page_width(self):
+        """Return the document's page width in centimeters."""
+        root = self.get_dom_stylesheet()
+        nodes = root.iterfind(
+            './/{urn:oasis:names:tc:opendocument:xmlns:style:1.0}'
+            'page-layout/'
+            '{urn:oasis:names:tc:opendocument:xmlns:style:1.0}'
+            'page-layout-properties')
+        width = None
+        for node in nodes:
+            page_width = node.get(
+                '{urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0}'
+                'page-width')
+            margin_left = node.get(
+                '{urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0}'
+                'margin-left')
+            margin_right = node.get(
+                '{urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0}'
+                'margin-right')
+            if (page_width is None or
+                    margin_left is None or
+                    margin_right is None):
+                continue
+            try:
+                page_width, _ = self.convert_to_cm(page_width)
+                margin_left, _ = self.convert_to_cm(margin_left)
+                margin_right, _ = self.convert_to_cm(margin_right)
+            except ValueError, exp:
+                self.document.reporter.warning(
+                    'Stylesheet file contains invalid page width '
+                    'or margin size.')
+            width = page_width - margin_left - margin_right
+        if width is None:
+            # We can't find the width in styles, so we make a guess.
+            # Use a width of 6 in = 15.24 cm.
+            width = 15.24
+        return width
 
     def generate_figure(self, node, source, destination, current_element):
         caption = None
@@ -3263,14 +3445,16 @@ class ODFTranslator(nodes.GenericNodeVisitor):
     depart_admonition = depart_warning
 
     def generate_admonition(self, node, label, title=None):
-        el1 = SubElement(self.current_element, 'text:p', attrib = {
-            'text:style-name': self.rststyle('admon-%s-hdr', ( label, )),
-            })
+        translated_label = self.language.labels[label]
+        el1 = SubElement(self.current_element, 'text:p', attrib={
+            'text:style-name': self.rststyle(
+                'admon-%s-hdr', (label, )),
+        })
         if title:
             el1.text = title
         else:
-            el1.text = '%s!' % (label.capitalize(), )
-        s1 = self.rststyle('admon-%s-body', ( label, ))
+            el1.text = '%s!' % (translated_label.capitalize(), )
+        s1 = self.rststyle('admon-%s-body', (label, ))
         self.paragraph_style_stack.append(s1)
 
     #
diff --git a/docutils/test/test_writers/test_odt.py b/docutils/test/test_writers/test_odt.py
index b73b5e080..e50baed9e 100755
--- a/docutils/test/test_writers/test_odt.py
+++ b/docutils/test/test_writers/test_odt.py
@@ -96,6 +96,7 @@ class DocutilsOdtTestCase(DocutilsTestSupport.StandardTestCase):
         if settings_overrides is None:
             settings_overrides={}
             settings_overrides['_disable_config'] = True
+            settings_overrides['language_code'] = 'en-US'
 
         result = docutils.core.publish_string(
             source=input,
@@ -175,6 +176,7 @@ class DocutilsOdtTestCase(DocutilsTestSupport.StandardTestCase):
         settings_overrides = {
             'custom_header': 'Page %p% of %P%',
             'custom_footer': 'Title: %t%  Date: %d3%  Time: %t4%',
+            'language_code': 'en-US',
             }
         self.process_test('odt_custom_headfoot.txt', 'odt_custom_headfoot.odt',
             settings_overrides=settings_overrides,
-- 
2.11.4.GIT