lib/lyx2lyx/lyx_1_5.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: utf-8 -*-
   3 # Copyright (C) 2006 José Matos <jamatos@lyx.org>
   4 # Copyright (C) 2004-2006 Georg Baum <Georg.Baum@post.rwth-aachen.de>
   5 #
   6 # This program is free software; you can redistribute it and/or
   7 # modify it under the terms of the GNU General Public License
   8 # as published by the Free Software Foundation; either version 2
   9 # of the License, or (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  19
  20 """ Convert files to the file format generated by lyx 1.5"""
  21
  22 import re
  23 import unicodedata
  24 import sys, os
  25
  26 from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value, find_beginning_of, find_nonempty_line
  27 from LyX import get_encoding
  28
  29
  30 ####################################################################
  31 # Private helper functions
  32
  33 def find_end_of_inset(lines, i):
  34     " Find end of inset, where lines[i] is included."
  35     return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
  36
  37 def find_end_of_layout(lines, i):
  38     " Find end of layout, where lines[i] is included."
  39     return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
  40
  41 def find_beginning_of_layout(lines, i):
  42     "Find beginning of layout, where lines[i] is included."
  43     return find_beginning_of(lines, i, "\\begin_layout", "\\end_layout")
  44
  45 # End of helper functions
  46 ####################################################################
  47
  48
  49 ##
  50 #  Notes: Framed/Shaded
  51 #
  52
  53 def revert_framed(document):
  54     "Revert framed notes. "
  55     i = 0
  56     while 1:
  57         i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
  58
  59         if i == -1:
  60             return
  61         document.body[i] = "\\begin_inset Note"
  62         i = i + 1
  63
  64
  65 ##
  66 #  Fonts
  67 #
  68
  69 roman_fonts      = {'default' : 'default', 'ae'       : 'ae',
  70                     'times'   : 'times',   'palatino' : 'palatino',
  71                     'helvet'  : 'default', 'avant'    : 'default',
  72                     'newcent' : 'newcent', 'bookman'  : 'bookman',
  73                     'pslatex' : 'times'}
  74 sans_fonts       = {'default' : 'default', 'ae'       : 'default',
  75                     'times'   : 'default', 'palatino' : 'default',
  76                     'helvet'  : 'helvet',  'avant'    : 'avant',
  77                     'newcent' : 'default', 'bookman'  : 'default',
  78                     'pslatex' : 'helvet'}
  79 typewriter_fonts = {'default' : 'default', 'ae'       : 'default',
  80                     'times'   : 'default', 'palatino' : 'default',
  81                     'helvet'  : 'default', 'avant'    : 'default',
  82                     'newcent' : 'default', 'bookman'  : 'default',
  83                     'pslatex' : 'courier'}
  84
  85 def convert_font_settings(document):
  86     " Convert font settings. "
  87     i = 0
  88     i = find_token_exact(document.header, "\\fontscheme", i)
  89     if i == -1:
  90         document.warning("Malformed LyX document: Missing `\\fontscheme'.")
  91         return
  92     font_scheme = get_value(document.header, "\\fontscheme", i, i + 1)
  93     if font_scheme == '':
  94         document.warning("Malformed LyX document: Empty `\\fontscheme'.")
  95         font_scheme = 'default'
  96     if not font_scheme in roman_fonts.keys():
  97         document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
  98         font_scheme = 'default'
  99     document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme],
 100                           '\\font_sans %s' % sans_fonts[font_scheme],
 101                           '\\font_typewriter %s' % typewriter_fonts[font_scheme],
 102                           '\\font_default_family default',
 103                           '\\font_sc false',
 104                           '\\font_osf false',
 105                           '\\font_sf_scale 100',
 106                           '\\font_tt_scale 100']
 107
 108
 109 def revert_font_settings(document):
 110     " Revert font settings. "
 111     i = 0
 112     insert_line = -1
 113     fonts = {'roman' : 'default', 'sans' : 'default', 'typewriter' : 'default'}
 114     for family in 'roman', 'sans', 'typewriter':
 115         name = '\\font_%s' % family
 116         i = find_token_exact(document.header, name, i)
 117         if i == -1:
 118             document.warning("Malformed LyX document: Missing `%s'." % name)
 119             i = 0
 120         else:
 121             if (insert_line < 0):
 122                 insert_line = i
 123             fonts[family] = get_value(document.header, name, i, i + 1)
 124             del document.header[i]
 125     i = find_token_exact(document.header, '\\font_default_family', i)
 126     if i == -1:
 127         document.warning("Malformed LyX document: Missing `\\font_default_family'.")
 128         font_default_family = 'default'
 129     else:
 130         font_default_family = get_value(document.header, "\\font_default_family", i, i + 1)
 131         del document.header[i]
 132     i = find_token_exact(document.header, '\\font_sc', i)
 133     if i == -1:
 134         document.warning("Malformed LyX document: Missing `\\font_sc'.")
 135         font_sc = 'false'
 136     else:
 137         font_sc = get_value(document.header, '\\font_sc', i, i + 1)
 138         del document.header[i]
 139     if font_sc != 'false':
 140         document.warning("Conversion of '\\font_sc' not yet implemented.")
 141     i = find_token_exact(document.header, '\\font_osf', i)
 142     if i == -1:
 143         document.warning("Malformed LyX document: Missing `\\font_osf'.")
 144         font_osf = 'false'
 145     else:
 146         font_osf = get_value(document.header, '\\font_osf', i, i + 1)
 147         del document.header[i]
 148     i = find_token_exact(document.header, '\\font_sf_scale', i)
 149     if i == -1:
 150         document.warning("Malformed LyX document: Missing `\\font_sf_scale'.")
 151         font_sf_scale = '100'
 152     else:
 153         font_sf_scale = get_value(document.header, '\\font_sf_scale', i, i + 1)
 154         del document.header[i]
 155     if font_sf_scale != '100':
 156         document.warning("Conversion of '\\font_sf_scale' not yet implemented.")
 157     i = find_token_exact(document.header, '\\font_tt_scale', i)
 158     if i == -1:
 159         document.warning("Malformed LyX document: Missing `\\font_tt_scale'.")
 160         font_tt_scale = '100'
 161     else:
 162         font_tt_scale = get_value(document.header, '\\font_tt_scale', i, i + 1)
 163         del document.header[i]
 164     if font_tt_scale != '100':
 165         document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
 166     for font_scheme in roman_fonts.keys():
 167         if (roman_fonts[font_scheme] == fonts['roman'] and
 168             sans_fonts[font_scheme] == fonts['sans'] and
 169             typewriter_fonts[font_scheme] == fonts['typewriter']):
 170             document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
 171             if font_default_family != 'default':
 172                 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
 173             if font_osf == 'true':
 174                 document.warning("Ignoring `\\font_osf = true'")
 175             return
 176     font_scheme = 'default'
 177     document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
 178     if fonts['roman'] == 'cmr':
 179         document.preamble.append('\\renewcommand{\\rmdefault}{cmr}')
 180         if font_osf == 'true':
 181             document.preamble.append('\\usepackage{eco}')
 182             font_osf = 'false'
 183     for font in 'lmodern', 'charter', 'utopia', 'beraserif', 'ccfonts', 'chancery':
 184         if fonts['roman'] == font:
 185             document.preamble.append('\\usepackage{%s}' % font)
 186     for font in 'cmss', 'lmss', 'cmbr':
 187         if fonts['sans'] == font:
 188             document.preamble.append('\\renewcommand{\\sfdefault}{%s}' % font)
 189     for font in 'berasans':
 190         if fonts['sans'] == font:
 191             document.preamble.append('\\usepackage{%s}' % font)
 192     for font in 'cmtt', 'lmtt', 'cmtl':
 193         if fonts['typewriter'] == font:
 194             document.preamble.append('\\renewcommand{\\ttdefault}{%s}' % font)
 195     for font in 'courier', 'beramono', 'luximono':
 196         if fonts['typewriter'] == font:
 197             document.preamble.append('\\usepackage{%s}' % font)
 198     if font_default_family != 'default':
 199         document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
 200     if font_osf == 'true':
 201         document.warning("Ignoring `\\font_osf = true'")
 202
 203
 204 def revert_booktabs(document):
 205     " We remove the booktabs flag or everything else will become a mess. "
 206     re_row = re.compile(r'^<row.*space="[^"]+".*>$')
 207     re_tspace = re.compile(r'\s+topspace="[^"]+"')
 208     re_bspace = re.compile(r'\s+bottomspace="[^"]+"')
 209     re_ispace = re.compile(r'\s+interlinespace="[^"]+"')
 210     i = 0
 211     while 1:
 212         i = find_token(document.body, "\\begin_inset Tabular", i)
 213         if i == -1:
 214             return
 215         j = find_end_of_inset(document.body, i + 1)
 216         if j == -1:
 217             document.warning("Malformed LyX document: Could not find end of tabular.")
 218             continue
 219         for k in range(i, j):
 220             if re.search('^<features.* booktabs="true".*>$', document.body[k]):
 221                 document.warning("Converting 'booktabs' table to normal table.")
 222                 document.body[k] = document.body[k].replace(' booktabs="true"', '')
 223             if re.search(re_row, document.body[k]):
 224                 document.warning("Removing extra row space.")
 225                 document.body[k] = re_tspace.sub('', document.body[k])
 226                 document.body[k] = re_bspace.sub('', document.body[k])
 227                 document.body[k] = re_ispace.sub('', document.body[k])
 228         i = i + 1
 229
 230
 231 def convert_multiencoding(document, forward):
 232     """ Fix files with multiple encodings.
 233 Files with an inputencoding of "auto" or "default" and multiple languages
 234 where at least two languages have different default encodings are encoded
 235 in multiple encodings for file formats < 249. These files are incorrectly
 236 read and written (as if the whole file was in the encoding of the main
 237 language).
 238 This is not true for files written by CJK-LyX, they are always in the locale
 239 encoding.
 240
 241 This function
 242 - converts from fake unicode values to true unicode if forward is true, and
 243 - converts from true unicode values to fake unicode if forward is false.
 244 document.encoding must be set to the old value (format 248) in both cases.
 245
 246 We do this here and not in LyX.py because it is far easier to do the
 247 necessary parsing in modern formats than in ancient ones.
 248 """
 249     inset_types = ["Foot", "Note"]
 250     if document.cjk_encoding != '':
 251         return
 252     encoding_stack = [document.encoding]
 253     insets = []
 254     lang_re = re.compile(r"^\\lang\s(\S+)")
 255     inset_re = re.compile(r"^\\begin_inset\s(\S+)")
 256     if not forward: # no need to read file unless we are reverting
 257         spec_chars = read_unicodesymbols()
 258
 259     if document.inputencoding == "auto" or document.inputencoding == "default":
 260         i = 0
 261         while i < len(document.body):
 262             result = lang_re.match(document.body[i])
 263             if result:
 264                 language = result.group(1)
 265                 if language == "default":
 266                     document.warning("Resetting encoding from %s to %s." % (encoding_stack[-1], document.encoding), 3)
 267                     encoding_stack[-1] = document.encoding
 268                 else:
 269                     from lyx2lyx_lang import lang
 270                     document.warning("Setting encoding from %s to %s." % (encoding_stack[-1], lang[language][3]), 3)
 271                     encoding_stack[-1] = lang[language][3]
 272             elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
 273                 document.warning("Adding nested encoding %s." % encoding_stack[-1], 3)
 274                 if len(insets) > 0 and insets[-1] in inset_types:
 275                     from lyx2lyx_lang import lang
 276                     encoding_stack.append(lang[document.language][3])
 277                 else:
 278                     encoding_stack.append(encoding_stack[-1])
 279             elif find_token(document.body, "\\end_layout", i, i + 1) == i:
 280                 document.warning("Removing nested encoding %s." % encoding_stack[-1], 3)
 281                 if len(encoding_stack) == 1:
 282                     # Don't remove the document encoding from the stack
 283                     document.warning("Malformed LyX document: Unexpected `\\end_layout'.")
 284                 else:
 285                     del encoding_stack[-1]
 286             elif find_token(document.body, "\\begin_inset", i, i + 1) == i:
 287                 inset_result = inset_re.match(document.body[i])
 288                 if inset_result:
 289                     insets.append(inset_result.group(1))
 290                 else:
 291                     insets.append("")
 292             elif find_token(document.body, "\\end_inset", i, i + 1) == i:
 293                 del insets[-1]
 294             if encoding_stack[-1] != document.encoding:
 295                 if forward:
 296                     # This line has been incorrectly interpreted as if it was
 297                     # encoded in 'encoding'.
 298                     # Convert back to the 8bit string that was in the file.
 299                     orig = document.body[i].encode(document.encoding)
 300                     # Convert the 8bit string that was in the file to unicode
 301                     # with the correct encoding.
 302                     document.body[i] = orig.decode(encoding_stack[-1])
 303                 else:
 304                     try:
 305                         # Convert unicode to the 8bit string that will be written
 306                         # to the file with the correct encoding.
 307                         orig = document.body[i].encode(encoding_stack[-1])
 308                         # Convert the 8bit string that will be written to the
 309                         # file to fake unicode with the encoding that will later
 310                         # be used when writing to the file.
 311                         document.body[i] = orig.decode(document.encoding)
 312                     except:
 313                         last_char = document.body[i-1][-1]
 314                         mod_line, last_char = revert_unicode_line(document, i, last_char, insets, spec_chars)
 315                         document.body[i:i+1] = mod_line.split('\n')
 316                         i += len(mod_line.split('\n')) - 1
 317             i += 1
 318
 319
 320 def convert_utf8(document):
 321     " Set document encoding to UTF-8. "
 322     convert_multiencoding(document, True)
 323     document.encoding = "utf8"
 324
 325
 326 def revert_utf8(document):
 327     " Set document encoding to the value corresponding to inputencoding. "
 328     i = find_token(document.header, "\\inputencoding", 0)
 329     if i == -1:
 330         document.header.append("\\inputencoding auto")
 331     elif get_value(document.header, "\\inputencoding", i) == "utf8":
 332         document.header[i] = "\\inputencoding auto"
 333     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
 334     document.encoding = get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)
 335     convert_multiencoding(document, False)
 336
 337
 338 def read_unicodesymbols():
 339     " Read the unicodesymbols list of unicode characters and corresponding commands."
 340     pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
 341     fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
 342     spec_chars = {}
 343     for line in fp.readlines():
 344         if line[0] != '#':
 345             line=line.replace(' "',' ') # remove all quotation marks with spaces before
 346             line=line.replace('" ',' ') # remove all quotation marks with spaces after
 347             line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
 348             try:
 349                 # flag1 and flag2 are preamble and other flags
 350                 [ucs4,command,flag1,flag2] =line.split(None,3)
 351                 spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
 352             except:
 353                 pass
 354     fp.close()
 355     return spec_chars
 356
 357
 358 def revert_unicode_line(document, i, last_char, insets, spec_chars, replacement_character = '???'):
 359     # Define strings to start and end ERT and math insets
 360     ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout %s\n\\backslash\n' % document.default_layout
 361     ert_outro='\n\\end_layout\n\n\\end_inset\n'
 362     math_intro='\n\\begin_inset Formula $'
 363     math_outro='$\n\\end_inset'
 364
 365     mod_line = u''
 366     line = document.body[i]
 367     for character in line:
 368         try:
 369             # Try to write the character
 370             dummy = character.encode(document.encoding)
 371             mod_line += character
 372             last_char = character
 373         except:
 374             # Try to replace with ERT/math inset
 375             if spec_chars.has_key(character):
 376                 command = spec_chars[character][0] # the command to replace unicode
 377                 flag1 = spec_chars[character][1]
 378                 flag2 = spec_chars[character][2]
 379                 if flag1.find('combining') > -1 or flag2.find('combining') > -1:
 380                     # We have a character that should be combined with the previous
 381                     command += '{' + last_char + '}'
 382                     # Remove the last character. Ignore if it is whitespace
 383                     if len(last_char.rstrip()):
 384                         # last_char was found and is not whitespace
 385                         if mod_line:
 386                             mod_line = mod_line[:-1]
 387                         else: # last_char belongs to the last line
 388                             document.body[i-1] = document.body[i-1][:-1]
 389                     else:
 390                         # The last character was replaced by a command. For now it is
 391                         # ignored. This could be handled better.
 392                         pass
 393                 if command[0:2] == '\\\\':
 394                     if command[2:12]=='ensuremath':
 395                         if insets and insets[-1] == "ERT":
 396                             # math in ERT
 397                             command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n')
 398                             command = command.replace('}', '$\n')
 399                         elif not insets or insets[-1] != "Formula":
 400                             # add a math inset with the replacement character
 401                             command = command.replace('\\\\ensuremath{\\', math_intro)
 402                             command = command.replace('}', math_outro)
 403                         else:
 404                             # we are already in a math inset
 405                             command = command.replace('\\\\ensuremath{\\', '')
 406                             command = command.replace('}', '')
 407                     else:
 408                         if insets and insets[-1] == "Formula":
 409                             # avoid putting an ERT in a math; instead put command as text
 410                             command = command.replace('\\\\', '\mathrm{')
 411                             command = command + '}'
 412                         elif not insets or insets[-1] != "ERT":
 413                             # add an ERT inset with the replacement character
 414                             command = command.replace('\\\\', ert_intro)
 415                             command = command + ert_outro
 416                         else:
 417                             command = command.replace('\\\\', '\n\\backslash\n')
 418                     last_char = '' # indicate that the character should not be removed
 419                 mod_line += command
 420             else:
 421                 # Replace with replacement string
 422                 mod_line += replacement_character
 423     return mod_line, last_char
 424
 425
 426 def revert_unicode(document):
 427     '''Transform unicode characters that can not be written using the
 428 document encoding to commands according to the unicodesymbols
 429 file. Characters that can not be replaced by commands are replaced by
 430 an replacement string.  Flags other than 'combined' are currently not
 431 implemented.'''
 432     spec_chars = read_unicodesymbols()
 433     insets = [] # list of active insets
 434     last_char = '' # to store the previous character
 435
 436     # Go through the document to capture all combining characters
 437     i = 0
 438     while i < len(document.body):
 439         line = document.body[i]
 440         # Check for insets
 441         if line.find('\\begin_inset') > -1:
 442             insets.append(line[13:].split()[0])
 443         if line.find('\\end_inset') > -1:
 444             del insets[-1]
 445
 446         # Try to write the line
 447         try:
 448             # If all goes well the line is written here
 449             dummy = line.encode(document.encoding)
 450             last_char = line[-1]
 451             i += 1
 452         except:
 453             # Error, some character(s) in the line need to be replaced
 454             mod_line, last_char = revert_unicode_line(document, i, last_char, insets, spec_chars)
 455             document.body[i:i+1] = mod_line.split('\n')
 456             i += len(mod_line.split('\n'))
 457
 458
 459 def revert_cs_label(document):
 460     " Remove status flag of charstyle label. "
 461     i = 0
 462     while 1:
 463         i = find_token(document.body, "\\begin_inset CharStyle", i)
 464         if i == -1:
 465             return
 466         # Seach for a line starting 'show_label'
 467         # If it is not there, break with a warning message
 468         i = i + 1
 469         while 1:
 470             if (document.body[i][:10] == "show_label"):
 471                 del document.body[i]
 472                 break
 473             elif (document.body[i][:13] == "\\begin_layout"):
 474                 document.warning("Malformed LyX document: Missing 'show_label'.")
 475                 break
 476             i = i + 1
 477
 478         i = i + 1
 479
 480
 481 def convert_bibitem(document):
 482     """ Convert
 483 \bibitem [option]{argument}
 484
 485 to
 486
 487 \begin_inset LatexCommand bibitem
 488 label "option"
 489 key "argument"
 490
 491 \end_inset
 492
 493 This must be called after convert_commandparams.
 494 """
 495     i = 0
 496     while 1:
 497         i = find_token(document.body, "\\bibitem", i)
 498         if i == -1:
 499             break
 500         j = document.body[i].find('[') + 1
 501         k = document.body[i].rfind(']')
 502         if j == 0: # No optional argument found
 503             option = None
 504         else:
 505             option = document.body[i][j:k]
 506         j = document.body[i].rfind('{') + 1
 507         k = document.body[i].rfind('}')
 508         argument = document.body[i][j:k]
 509         lines = ['\\begin_inset LatexCommand bibitem']
 510         if option != None:
 511             lines.append('label "%s"' % option.replace('"', '\\"'))
 512         lines.append('key "%s"' % argument.replace('"', '\\"'))
 513         lines.append('')
 514         lines.append('\\end_inset')
 515         document.body[i:i+1] = lines
 516         i = i + 1
 517
 518
 519 commandparams_info = {
 520     # command : [option1, option2, argument]
 521     "bibitem" : ["label", "", "key"],
 522     "bibtex" : ["options", "btprint", "bibfiles"],
 523     "cite"        : ["after", "before", "key"],
 524     "citet"       : ["after", "before", "key"],
 525     "citep"       : ["after", "before", "key"],
 526     "citealt"     : ["after", "before", "key"],
 527     "citealp"     : ["after", "before", "key"],
 528     "citeauthor"  : ["after", "before", "key"],
 529     "citeyear"    : ["after", "before", "key"],
 530     "citeyearpar" : ["after", "before", "key"],
 531     "citet*"      : ["after", "before", "key"],
 532     "citep*"      : ["after", "before", "key"],
 533     "citealt*"    : ["after", "before", "key"],
 534     "citealp*"    : ["after", "before", "key"],
 535     "citeauthor*" : ["after", "before", "key"],
 536     "Citet"       : ["after", "before", "key"],
 537     "Citep"       : ["after", "before", "key"],
 538     "Citealt"     : ["after", "before", "key"],
 539     "Citealp"     : ["after", "before", "key"],
 540     "Citeauthor"  : ["after", "before", "key"],
 541     "Citet*"      : ["after", "before", "key"],
 542     "Citep*"      : ["after", "before", "key"],
 543     "Citealt*"    : ["after", "before", "key"],
 544     "Citealp*"    : ["after", "before", "key"],
 545     "Citeauthor*" : ["after", "before", "key"],
 546     "citefield"   : ["after", "before", "key"],
 547     "citetitle"   : ["after", "before", "key"],
 548     "cite*"       : ["after", "before", "key"],
 549     "hfill" : ["", "", ""],
 550     "index"      : ["", "", "name"],
 551     "printindex" : ["", "", "name"],
 552     "label" : ["", "", "name"],
 553     "eqref"     : ["name", "", "reference"],
 554     "pageref"   : ["name", "", "reference"],
 555     "prettyref" : ["name", "", "reference"],
 556     "ref"       : ["name", "", "reference"],
 557     "vpageref"  : ["name", "", "reference"],
 558     "vref"      : ["name", "", "reference"],
 559     "tableofcontents" : ["", "", "type"],
 560     "htmlurl" : ["name", "", "target"],
 561     "url"     : ["name", "", "target"]}
 562
 563
 564 def convert_commandparams(document):
 565     """ Convert
 566
 567  \begin_inset LatexCommand \cmdname[opt1][opt2]{arg}
 568  \end_inset
 569
 570  to
 571
 572  \begin_inset LatexCommand cmdname
 573  name1 "opt1"
 574  name2 "opt2"
 575  name3 "arg"
 576  \end_inset
 577
 578  name1, name2 and name3 can be different for each command.
 579 """
 580     # \begin_inset LatexCommand bibitem was not the official version (see
 581     # convert_bibitem()), but could be read in, so we convert it here, too.
 582
 583     i = 0
 584     while 1:
 585         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 586         if i == -1:
 587             break
 588         command = document.body[i][26:].strip()
 589         if command == "":
 590             document.warning("Malformed LyX document: Missing LatexCommand name.")
 591             i = i + 1
 592             continue
 593
 594         j = find_token(document.body, "\\end_inset", i + 1)
 595         if j == -1:
 596             document.warning("Malformed document")
 597         else:
 598             command += "".join(document.body[i+1:j])
 599             document.body[i+1:j] = []
 600
 601         # The following parser is taken from the original InsetCommandParams::scanCommand
 602         name = ""
 603         option1 = ""
 604         option2 = ""
 605         argument = ""
 606         state = "WS"
 607         # Used to handle things like \command[foo[bar]]{foo{bar}}
 608         nestdepth = 0
 609         b = 0
 610         for c in command:
 611             if ((state == "CMDNAME" and c == ' ') or
 612                 (state == "CMDNAME" and c == '[') or
 613                 (state == "CMDNAME" and c == '{')):
 614                 state = "WS"
 615             if ((state == "OPTION" and c == ']') or
 616                 (state == "SECOPTION" and c == ']') or
 617                 (state == "CONTENT" and c == '}')):
 618                 if nestdepth == 0:
 619                     state = "WS"
 620                 else:
 621                     nestdepth = nestdepth - 1
 622             if ((state == "OPTION" and c == '[') or
 623                 (state == "SECOPTION" and c == '[') or
 624                 (state == "CONTENT" and c == '{')):
 625                 nestdepth = nestdepth + 1
 626             if state == "CMDNAME":
 627                     name += c
 628             elif state == "OPTION":
 629                     option1 += c
 630             elif state == "SECOPTION":
 631                     option2 += c
 632             elif state == "CONTENT":
 633                     argument += c
 634             elif state == "WS":
 635                 if c == '\\':
 636                     state = "CMDNAME"
 637                 elif c == '[' and b != ']':
 638                     state = "OPTION"
 639                     nestdepth = 0 # Just to be sure
 640                 elif c == '[' and b == ']':
 641                     state = "SECOPTION"
 642                     nestdepth = 0 # Just to be sure
 643                 elif c == '{':
 644                     state = "CONTENT"
 645                     nestdepth = 0 # Just to be sure
 646             b = c
 647
 648         # Now we have parsed the command, output the parameters
 649         lines = ["\\begin_inset LatexCommand %s" % name]
 650         if option1 != "":
 651             if commandparams_info[name][0] == "":
 652                 document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
 653             else:
 654                 lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('"', '\\"')))
 655         if option2 != "":
 656             if commandparams_info[name][1] == "":
 657                 document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
 658             else:
 659                 lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('"', '\\"')))
 660         if argument != "":
 661             if commandparams_info[name][2] == "":
 662                 document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
 663             else:
 664                 lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('"', '\\"')))
 665         document.body[i:i+1] = lines
 666         i = i + 1
 667
 668
 669 def revert_commandparams(document):
 670     regex = re.compile(r'(\S+)\s+(.+)')
 671     i = 0
 672     while 1:
 673         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 674         if i == -1:
 675             break
 676         name = document.body[i].split()[2]
 677         j = find_end_of_inset(document.body, i + 1)
 678         preview_line = ""
 679         option1 = ""
 680         option2 = ""
 681         argument = ""
 682         for k in range(i + 1, j):
 683             match = re.match(regex, document.body[k])
 684             if match:
 685                 pname = match.group(1)
 686                 pvalue = match.group(2)
 687                 if pname == "preview":
 688                     preview_line = document.body[k]
 689                 elif (commandparams_info[name][0] != "" and
 690                       pname == commandparams_info[name][0]):
 691                     option1 = pvalue.strip('"').replace('\\"', '"')
 692                 elif (commandparams_info[name][1] != "" and
 693                       pname == commandparams_info[name][1]):
 694                     option2 = pvalue.strip('"').replace('\\"', '"')
 695                 elif (commandparams_info[name][2] != "" and
 696                       pname == commandparams_info[name][2]):
 697                     argument = pvalue.strip('"').replace('\\"', '"')
 698             elif document.body[k].strip() != "":
 699                 document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
 700         if name == "bibitem":
 701             if option1 == "":
 702                 lines = ["\\bibitem {%s}" % argument]
 703             else:
 704                 lines = ["\\bibitem [%s]{%s}" % (option1, argument)]
 705         else:
 706             if option1 == "":
 707                 if option2 == "":
 708                     lines = ["\\begin_inset LatexCommand \\%s{%s}" % (name, argument)]
 709                 else:
 710                     lines = ["\\begin_inset LatexCommand \\%s[][%s]{%s}" % (name, option2, argument)]
 711             else:
 712                 if option2 == "":
 713                     lines = ["\\begin_inset LatexCommand \\%s[%s]{%s}" % (name, option1, argument)]
 714                 else:
 715                     lines = ["\\begin_inset LatexCommand \\%s[%s][%s]{%s}" % (name, option1, option2, argument)]
 716         if name != "bibitem":
 717             if preview_line != "":
 718                 lines.append(preview_line)
 719             lines.append('')
 720             lines.append('\\end_inset')
 721         document.body[i:j+1] = lines
 722         i = j + 1
 723
 724
 725 def revert_nomenclature(document):
 726     " Convert nomenclature entry to ERT. "
 727     regex = re.compile(r'(\S+)\s+(.+)')
 728     i = 0
 729     use_nomencl = 0
 730     while 1:
 731         i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
 732         if i == -1:
 733             break
 734         use_nomencl = 1
 735         j = find_end_of_inset(document.body, i + 1)
 736         preview_line = ""
 737         symbol = ""
 738         description = ""
 739         prefix = ""
 740         for k in range(i + 1, j):
 741             match = re.match(regex, document.body[k])
 742             if match:
 743                 name = match.group(1)
 744                 value = match.group(2)
 745                 if name == "preview":
 746                     preview_line = document.body[k]
 747                 elif name == "symbol":
 748                     symbol = value.strip('"').replace('\\"', '"')
 749                 elif name == "description":
 750                     description = value.strip('"').replace('\\"', '"')
 751                 elif name == "prefix":
 752                     prefix = value.strip('"').replace('\\"', '"')
 753             elif document.body[k].strip() != "":
 754                 document.warning("Ignoring unknown contents `%s' in nomenclature inset." % document.body[k])
 755         if prefix == "":
 756             command = 'nomenclature{%s}{%s}' % (symbol, description)
 757         else:
 758             command = 'nomenclature[%s]{%s}{%s}' % (prefix, symbol, description)
 759         document.body[i:j+1] = ['\\begin_inset ERT',
 760                                 'status collapsed',
 761                                 '',
 762                                 '\\begin_layout %s' % document.default_layout,
 763                                 '',
 764                                 '',
 765                                 '\\backslash',
 766                                 command,
 767                                 '\\end_layout',
 768                                 '',
 769                                 '\\end_inset']
 770         i = i + 11
 771     if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
 772         document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
 773         document.preamble.append('\\makenomenclature')
 774
 775
 776 def revert_printnomenclature(document):
 777     " Convert printnomenclature to ERT. "
 778     regex = re.compile(r'(\S+)\s+(.+)')
 779     i = 0
 780     use_nomencl = 0
 781     while 1:
 782         i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
 783         if i == -1:
 784             break
 785         use_nomencl = 1
 786         j = find_end_of_inset(document.body, i + 1)
 787         preview_line = ""
 788         labelwidth = ""
 789         for k in range(i + 1, j):
 790             match = re.match(regex, document.body[k])
 791             if match:
 792                 name = match.group(1)
 793                 value = match.group(2)
 794                 if name == "preview":
 795                     preview_line = document.body[k]
 796                 elif name == "labelwidth":
 797                     labelwidth = value.strip('"').replace('\\"', '"')
 798             elif document.body[k].strip() != "":
 799                 document.warning("Ignoring unknown contents `%s' in printnomenclature inset." % document.body[k])
 800         if labelwidth == "":
 801             command = 'nomenclature{}'
 802         else:
 803             command = 'nomenclature[%s]' % labelwidth
 804         document.body[i:j+1] = ['\\begin_inset ERT',
 805                                 'status collapsed',
 806                                 '',
 807                                 '\\begin_layout %s' % document.default_layout,
 808                                 '',
 809                                 '',
 810                                 '\\backslash',
 811                                 command,
 812                                 '\\end_layout',
 813                                 '',
 814                                 '\\end_inset']
 815         i = i + 11
 816     if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
 817         document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
 818         document.preamble.append('\\makenomenclature')
 819
 820
 821 def convert_esint(document):
 822     " Add \\use_esint setting to header. "
 823     i = find_token(document.header, "\\cite_engine", 0)
 824     if i == -1:
 825         document.warning("Malformed LyX document: Missing `\\cite_engine'.")
 826         return
 827     # 0 is off, 1 is auto, 2 is on.
 828     document.header.insert(i, '\\use_esint 0')
 829
 830
 831 def revert_esint(document):
 832     " Remove \\use_esint setting from header. "
 833     i = find_token(document.header, "\\use_esint", 0)
 834     if i == -1:
 835         document.warning("Malformed LyX document: Missing `\\use_esint'.")
 836         return
 837     use_esint = document.header[i].split()[1]
 838     del document.header[i]
 839     # 0 is off, 1 is auto, 2 is on.
 840     if (use_esint == 2):
 841         document.preamble.append('\\usepackage{esint}')
 842
 843
 844 def revert_clearpage(document):
 845     " clearpage -> ERT "
 846     i = 0
 847     while 1:
 848         i = find_token(document.body, "\\clearpage", i)
 849         if i == -1:
 850             break
 851         document.body[i:i+1] =  ['\\begin_inset ERT',
 852                                 'status collapsed',
 853                                 '',
 854                                 '\\begin_layout %s' % document.default_layout,
 855                                 '',
 856                                 '',
 857                                 '\\backslash',
 858                                 'clearpage',
 859                                 '\\end_layout',
 860                                 '',
 861                                 '\\end_inset']
 862     i = i + 1
 863
 864
 865 def revert_cleardoublepage(document):
 866     " cleardoublepage -> ERT "
 867     i = 0
 868     while 1:
 869         i = find_token(document.body, "\\cleardoublepage", i)
 870         if i == -1:
 871             break
 872         document.body[i:i+1] =  ['\\begin_inset ERT',
 873                                 'status collapsed',
 874                                 '',
 875                                 '\\begin_layout %s' % document.default_layout,
 876                                 '',
 877                                 '',
 878                                 '\\backslash',
 879                                 'cleardoublepage',
 880                                 '\\end_layout',
 881                                 '',
 882                                 '\\end_inset']
 883     i = i + 1
 884
 885
 886 def convert_lyxline(document):
 887     " remove fontsize commands for \lyxline "
 888     # The problematic is: The old \lyxline definition doesn't handle the fontsize
 889     # to change the line thickness. The new definiton does this so that imported
 890     # \lyxlines would have a different line thickness. The eventual fontsize command
 891     # before \lyxline is therefore removed to get the same output.
 892     fontsizes = ["tiny", "scriptsize", "footnotesize", "small", "normalsize",
 893                  "large", "Large", "LARGE", "huge", "Huge"]
 894     for n in range(0, len(fontsizes)):
 895         i = 0
 896         k = 0
 897         while i < len(document.body):
 898             i = find_token(document.body, "\\size " + fontsizes[n], i)
 899             k = find_token(document.body, "\\lyxline", i)
 900             # the corresponding fontsize command is always 2 lines before the \lyxline
 901             if (i != -1 and k == i+2):
 902                 document.body[i:i+1] = []
 903             else:
 904                 break
 905         i = i + 1
 906
 907
 908 def revert_encodings(document):
 909     " Set new encodings to auto. "
 910     encodings = ["8859-6", "8859-8", "cp437", "cp437de", "cp850", "cp852",
 911                  "cp855", "cp858", "cp862", "cp865", "cp866", "cp1250",
 912                  "cp1252", "cp1256", "cp1257", "latin10", "pt254", "tis620-0"]
 913     i = find_token(document.header, "\\inputencoding", 0)
 914     if i == -1:
 915         document.header.append("\\inputencoding auto")
 916     else:
 917         inputenc = get_value(document.header, "\\inputencoding", i)
 918         if inputenc in encodings:
 919             document.header[i] = "\\inputencoding auto"
 920     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
 921
 922
 923 def convert_caption(document):
 924     " Convert caption layouts to caption insets. "
 925     i = 0
 926     while 1:
 927         i = find_token(document.body, "\\begin_layout Caption", i)
 928         if i == -1:
 929             return
 930         j = find_end_of_layout(document.body, i)
 931         if j == -1:
 932             document.warning("Malformed LyX document: Missing `\\end_layout'.")
 933             return
 934
 935         document.body[j:j] = ["\\end_layout", "", "\\end_inset", "", ""]
 936         document.body[i:i+1] = ["\\begin_layout %s" % document.default_layout,
 937                             "\\begin_inset Caption", "",
 938                             "\\begin_layout %s" % document.default_layout]
 939         i = i + 1
 940
 941
 942 def revert_caption(document):
 943     " Convert caption insets to caption layouts. "
 944     " This assumes that the text class has a caption style. "
 945     i = 0
 946     while 1:
 947         i = find_token(document.body, "\\begin_inset Caption", i)
 948         if i == -1:
 949             return
 950
 951         # We either need to delete the previous \begin_layout line, or we
 952         # need to end the previous layout if this inset is not in the first
 953         # position of the paragraph.
 954         layout_before = find_token_backwards(document.body, "\\begin_layout", i)
 955         if layout_before == -1:
 956             document.warning("Malformed LyX document: Missing `\\begin_layout'.")
 957             return
 958         layout_line = document.body[layout_before]
 959         del_layout_before = True
 960         l = layout_before + 1
 961         while l < i:
 962             if document.body[l] != "":
 963                 del_layout_before = False
 964                 break
 965             l = l + 1
 966         if del_layout_before:
 967             del document.body[layout_before:i]
 968             i = layout_before
 969         else:
 970             document.body[i:i] = ["\\end_layout", ""]
 971             i = i + 2
 972
 973         # Find start of layout in the inset and end of inset
 974         j = find_token(document.body, "\\begin_layout", i)
 975         if j == -1:
 976             document.warning("Malformed LyX document: Missing `\\begin_layout'.")
 977             return
 978         k = find_end_of_inset(document.body, i)
 979         if k == -1:
 980             document.warning("Malformed LyX document: Missing `\\end_inset'.")
 981             return
 982
 983         # We either need to delete the following \end_layout line, or we need
 984         # to restart the old layout if this inset is not at the paragraph end.
 985         layout_after = find_token(document.body, "\\end_layout", k)
 986         if layout_after == -1:
 987             document.warning("Malformed LyX document: Missing `\\end_layout'.")
 988             return
 989         del_layout_after = True
 990         l = k + 1
 991         while l < layout_after:
 992             if document.body[l] != "":
 993                 del_layout_after = False
 994                 break
 995             l = l + 1
 996         if del_layout_after:
 997             del document.body[k+1:layout_after+1]
 998         else:
 999             document.body[k+1:k+1] = [layout_line, ""]
1000
1001         # delete \begin_layout and \end_inset and replace \begin_inset with
1002         # "\begin_layout Caption". This works because we can only have one
1003         # paragraph in the caption inset: The old \end_layout will be recycled.
1004         del document.body[k]
1005         if document.body[k] == "":
1006             del document.body[k]
1007         del document.body[j]
1008         if document.body[j] == "":
1009             del document.body[j]
1010         document.body[i] = "\\begin_layout Caption"
1011         if document.body[i+1] == "":
1012             del document.body[i+1]
1013         i = i + 1
1014
1015
1016 # Accents of InsetLaTeXAccent
1017 accent_map = {
1018     "`" : u'\u0300', # grave
1019     "'" : u'\u0301', # acute
1020     "^" : u'\u0302', # circumflex
1021     "~" : u'\u0303', # tilde
1022     "=" : u'\u0304', # macron
1023     "u" : u'\u0306', # breve
1024     "." : u'\u0307', # dot above
1025     "\"": u'\u0308', # diaeresis
1026     "r" : u'\u030a', # ring above
1027     "H" : u'\u030b', # double acute
1028     "v" : u'\u030c', # caron
1029     "b" : u'\u0320', # minus sign below
1030     "d" : u'\u0323', # dot below
1031     "c" : u'\u0327', # cedilla
1032     "k" : u'\u0328', # ogonek
1033     "t" : u'\u0361'  # tie. This is special: It spans two characters, but
1034                      # only one is given as argument, so we don't need to
1035                      # treat it differently.
1036 }
1037
1038
1039 # special accents of InsetLaTeXAccent without argument
1040 special_accent_map = {
1041     'i' : u'\u0131', # dotless i
1042     'j' : u'\u0237', # dotless j
1043     'l' : u'\u0142', # l with stroke
1044     'L' : u'\u0141'  # L with stroke
1045 }
1046
1047
1048 # special accent arguments of InsetLaTeXAccent
1049 accented_map = {
1050     '\\i' : u'\u0131', # dotless i
1051     '\\j' : u'\u0237'  # dotless j
1052 }
1053
1054
1055 def _convert_accent(accent, accented_char):
1056     type = accent
1057     char = accented_char
1058     if char == '':
1059         if type in special_accent_map:
1060             return special_accent_map[type]
1061         # a missing char is treated as space by LyX
1062         char = ' '
1063     elif type == 'q' and char in ['t', 'd', 'l', 'L']:
1064         # Special caron, only used with t, d, l and L.
1065         # It is not in the map because we convert it to the same unicode
1066         # character as the normal caron: \q{} is only defined if babel with
1067         # the czech or slovak language is used, and the normal caron
1068         # produces the correct output if the T1 font encoding is used.
1069         # For the same reason we never convert to \q{} in the other direction.
1070         type = 'v'
1071     elif char in accented_map:
1072         char = accented_map[char]
1073     elif (len(char) > 1):
1074         # We can only convert accents on a single char
1075         return ''
1076     a = accent_map.get(type)
1077     if a:
1078         return unicodedata.normalize("NFC", "%s%s" % (char, a))
1079     return ''
1080
1081
1082 def convert_ertbackslash(body, i, ert, default_layout):
1083     r""" -------------------------------------------------------------------------------------------
1084     Convert backslashes and '\n' into valid ERT code, append the converted
1085     text to body[i] and return the (maybe incremented) line index i"""
1086
1087     for c in ert:
1088         if c == '\\':
1089             body[i] = body[i] + '\\backslash '
1090             i = i + 1
1091             body.insert(i, '')
1092         elif c == '\n':
1093             body[i+1:i+1] = ['\\end_layout', '', '\\begin_layout %s' % default_layout, '']
1094             i = i + 4
1095         else:
1096             body[i] = body[i] + c
1097     return i
1098
1099
1100 def convert_accent(document):
1101     # The following forms are supported by LyX:
1102     # '\i \"{a}' (standard form, as written by LyX)
1103     # '\i \"{}' (standard form, as written by LyX if the accented char is a space)
1104     # '\i \"{ }' (also accepted if the accented char is a space)
1105     # '\i \" a'  (also accepted)
1106     # '\i \"'    (also accepted)
1107     re_wholeinset = re.compile(r'^(.*)(\\i\s+)(.*)$')
1108     re_contents = re.compile(r'^([^\s{]+)(.*)$')
1109     re_accentedcontents = re.compile(r'^\s*{?([^{}]*)}?\s*$')
1110     i = 0
1111     while 1:
1112         i = find_re(document.body, re_wholeinset, i)
1113         if i == -1:
1114             return
1115         match = re_wholeinset.match(document.body[i])
1116         prefix = match.group(1)
1117         contents = match.group(3).strip()
1118         match = re_contents.match(contents)
1119         if match:
1120             # Strip first char (always \)
1121             accent = match.group(1)[1:]
1122             accented_contents = match.group(2).strip()
1123             match = re_accentedcontents.match(accented_contents)
1124             accented_char = match.group(1)
1125             converted = _convert_accent(accent, accented_char)
1126             if converted == '':
1127                 # Normalize contents
1128                 contents = '%s{%s}' % (accent, accented_char),
1129             else:
1130                 document.body[i] = '%s%s' % (prefix, converted)
1131                 i += 1
1132                 continue
1133         document.warning("Converting unknown InsetLaTeXAccent `\\i %s' to ERT." % contents)
1134         document.body[i] = prefix
1135         document.body[i+1:i+1] = ['\\begin_inset ERT',
1136                                   'status collapsed',
1137                                   '',
1138                                   '\\begin_layout %s' % document.default_layout,
1139                                   '',
1140                                   '',
1141                                   '']
1142         i = convert_ertbackslash(document.body, i + 7,
1143                                  '\\%s' % contents,
1144                                  document.default_layout)
1145         document.body[i+1:i+1] = ['\\end_layout',
1146                                   '',
1147                                   '\\end_inset']
1148         i += 3
1149
1150
1151 def revert_accent(document):
1152     inverse_accent_map = {}
1153     for k in accent_map:
1154         inverse_accent_map[accent_map[k]] = k
1155     inverse_special_accent_map = {}
1156     for k in special_accent_map:
1157         inverse_special_accent_map[special_accent_map[k]] = k
1158     inverse_accented_map = {}
1159     for k in accented_map:
1160         inverse_accented_map[accented_map[k]] = k
1161
1162     # Since LyX may insert a line break within a word we must combine all
1163     # words before unicode normalization.
1164     # We do this only if the next line starts with an accent, otherwise we
1165     # would create things like '\begin_inset ERTstatus'.
1166     numberoflines = len(document.body)
1167     for i in range(numberoflines-1):
1168         if document.body[i] == '' or document.body[i+1] == '' or document.body[i][-1] == ' ':
1169             continue
1170         if (document.body[i+1][0] in inverse_accent_map):
1171             # the last character of this line and the first of the next line
1172             # form probably a surrogate pair.
1173             while (len(document.body[i+1]) > 0 and document.body[i+1][0] != ' '):
1174                 document.body[i] += document.body[i+1][0]
1175                 document.body[i+1] = document.body[i+1][1:]
1176
1177     # Normalize to "Normal form D" (NFD, also known as canonical decomposition).
1178     # This is needed to catch all accented characters.
1179     for i in range(numberoflines):
1180         # Unfortunately we have a mixture of unicode strings and plain strings,
1181         # because we never use u'xxx' for string literals, but 'xxx'.
1182         # Therefore we may have to try two times to normalize the data.
1183         try:
1184             document.body[i] = unicodedata.normalize("NFD", document.body[i])
1185         except TypeError:
1186             document.body[i] = unicodedata.normalize("NFD", unicode(document.body[i], 'utf-8'))
1187
1188     # Replace accented characters with InsetLaTeXAccent
1189     # Do not convert characters that can be represented in the chosen
1190     # encoding.
1191     encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)]
1192     lang_re = re.compile(r"^\\lang\s(\S+)")
1193     i = 0
1194     while i < len(document.body):
1195
1196         if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '':
1197             # Track the encoding of the current line
1198             result = lang_re.match(document.body[i])
1199             if result:
1200                 language = result.group(1)
1201                 if language == "default":
1202                     encoding_stack[-1] = document.encoding
1203                 else:
1204                     from lyx2lyx_lang import lang
1205                     encoding_stack[-1] = lang[language][3]
1206                 continue
1207             elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
1208                 encoding_stack.append(encoding_stack[-1])
1209                 continue
1210             elif find_token(document.body, "\\end_layout", i, i + 1) == i:
1211                 del encoding_stack[-1]
1212                 continue
1213
1214         for j in range(len(document.body[i])):
1215             # dotless i and dotless j are both in special_accent_map and can
1216             # occur as an accented character, so we need to test that the
1217             # following character is no accent
1218             if (document.body[i][j] in inverse_special_accent_map and
1219                 (j == len(document.body[i]) - 1 or document.body[i][j+1] not in inverse_accent_map)):
1220                 accent = document.body[i][j]
1221                 try:
1222                     dummy = accent.encode(encoding_stack[-1])
1223                 except UnicodeEncodeError:
1224                     # Insert the rest of the line as new line
1225                     if j < len(document.body[i]) - 1:
1226                         document.body.insert(i+1, document.body[i][j+1:])
1227                     # Delete the accented character
1228                     if j > 0:
1229                         document.body[i] = document.body[i][:j-1]
1230                     else:
1231                         document.body[i] = u''
1232                     # Finally add the InsetLaTeXAccent
1233                     document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
1234                     break
1235             elif j > 0 and document.body[i][j] in inverse_accent_map:
1236                 accented_char = document.body[i][j-1]
1237                 if accented_char == ' ':
1238                     # Conform to LyX output
1239                     accented_char = ''
1240                 elif accented_char in inverse_accented_map:
1241                     accented_char = inverse_accented_map[accented_char]
1242                 accent = document.body[i][j]
1243                 try:
1244                     dummy = unicodedata.normalize("NFC", accented_char + accent).encode(encoding_stack[-1])
1245                 except UnicodeEncodeError:
1246                     # Insert the rest of the line as new line
1247                     if j < len(document.body[i]) - 1:
1248                         document.body.insert(i+1, document.body[i][j+1:])
1249                     # Delete the accented characters
1250                     if j > 1:
1251                         document.body[i] = document.body[i][:j-2]
1252                     else:
1253                         document.body[i] = u''
1254                     # Finally add the InsetLaTeXAccent
1255                     document.body[i] += "\\i \\%s{%s}" % (inverse_accent_map[accent], accented_char)
1256                     break
1257         i = i + 1
1258
1259     # Normalize to "Normal form C" (NFC, pre-composed characters) again
1260     for i in range(numberoflines):
1261         document.body[i] = unicodedata.normalize("NFC", document.body[i])
1262
1263
1264 def normalize_font_whitespace_259(document):
1265     """ Before format 259 the font changes were ignored if a
1266     whitespace was the first or last character in the sequence, this function
1267     transfers the whitespace outside."""
1268
1269     char_properties = {"\\series": "default",
1270                        "\\emph": "default",
1271                        "\\color": "none",
1272                        "\\shape": "default",
1273                        "\\bar": "default",
1274                        "\\family": "default"}
1275     return normalize_font_whitespace(document, char_properties)
1276
1277 def normalize_font_whitespace_274(document):
1278     """ Before format 259 (sic) the font changes were ignored if a
1279     whitespace was the first or last character in the sequence. This was
1280     corrected for most font properties in format 259, but the language
1281     was forgotten then. This function applies the same conversion done
1282     there (namely, transfers the whitespace outside) for font language
1283     changes, as well."""
1284
1285     char_properties = {"\\lang": "default"}
1286     return normalize_font_whitespace(document, char_properties)
1287
1288 def get_paragraph_language(document, i):
1289     """ Return the language of the paragraph in which line i of the document
1290     body is. If the first thing in the paragraph is a \\lang command, that
1291     is the paragraph's langauge; otherwise, the paragraph's language is the
1292     document's language."""
1293
1294     lines = document.body
1295
1296     first_nonempty_line = \
1297         find_nonempty_line(lines, find_beginning_of_layout(lines, i) + 1)
1298
1299     words = lines[first_nonempty_line].split()
1300
1301     if len(words) > 1 and words[0] == "\\lang":
1302         return words[1]
1303     else:
1304         return document.language
1305
1306 def normalize_font_whitespace(document, char_properties):
1307     """ Before format 259 the font changes were ignored if a
1308     whitespace was the first or last character in the sequence, this function
1309     transfers the whitespace outside. Only a change in one of the properties
1310     in the provided     char_properties is handled by this function."""
1311
1312     if document.backend != "latex":
1313         return
1314
1315     lines = document.body
1316
1317     changes = {}
1318
1319     i = 0
1320     while i < len(lines):
1321         words = lines[i].split()
1322
1323         if len(words) > 0 and words[0] == "\\begin_layout":
1324             # a new paragraph resets all font changes
1325             changes.clear()
1326             # also reset the default language to be the paragraph's language
1327             if "\\lang" in char_properties.keys():
1328                 char_properties["\\lang"] = \
1329                     get_paragraph_language(document, i + 1)
1330
1331         elif len(words) > 1 and words[0] in char_properties.keys():
1332             # we have a font change
1333             if char_properties[words[0]] == words[1]:
1334                 # property gets reset
1335                 if words[0] in changes.keys():
1336                     del changes[words[0]]
1337                 defaultproperty = True
1338             else:
1339                 # property gets set
1340                 changes[words[0]] = words[1]
1341                 defaultproperty = False
1342
1343             # We need to explicitly reset all changed properties if we find
1344             # a space below, because LyX 1.4 would output the space after
1345             # closing the previous change and before starting the new one,
1346             # and closing a font change means to close all properties, not
1347             # just the changed one.
1348
1349             if lines[i-1] and lines[i-1][-1] == " ":
1350                 lines[i-1] = lines[i-1][:-1]
1351                 # a space before the font change
1352                 added_lines = [" "]
1353                 for k in changes.keys():
1354                     # exclude property k because that is already in lines[i]
1355                     if k != words[0]:
1356                         added_lines[1:1] = ["%s %s" % (k, changes[k])]
1357                 for k in changes.keys():
1358                     # exclude property k because that must be added below anyway
1359                     if k != words[0]:
1360                         added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1361                 if defaultproperty:
1362                     # Property is reset in lines[i], so add the new stuff afterwards
1363                     lines[i+1:i+1] = added_lines
1364                 else:
1365                     # Reset property for the space
1366                     added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1367                     lines[i:i] = added_lines
1368                 i = i + len(added_lines)
1369
1370             elif lines[i+1] and lines[i+1][0] == " " and (len(changes) > 0 or not defaultproperty):
1371                 # a space after the font change
1372                 if (lines[i+1] == " " and lines[i+2]):
1373                     next_words = lines[i+2].split()
1374                     if len(next_words) > 0 and next_words[0] == words[0]:
1375                         # a single blank with a property different from the
1376                         # previous and the next line must not be changed
1377                         i = i + 2
1378                         continue
1379                 lines[i+1] = lines[i+1][1:]
1380                 added_lines = [" "]
1381                 for k in changes.keys():
1382                     # exclude property k because that is already in lines[i]
1383                     if k != words[0]:
1384                         added_lines[1:1] = ["%s %s" % (k, changes[k])]
1385                 for k in changes.keys():
1386                     # exclude property k because that must be added below anyway
1387                     if k != words[0]:
1388                         added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1389                 # Reset property for the space
1390                 added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1391                 lines[i:i] = added_lines
1392                 i = i + len(added_lines)
1393
1394         i = i + 1
1395
1396
1397 def revert_utf8x(document):
1398     " Set utf8x encoding to utf8. "
1399     i = find_token(document.header, "\\inputencoding", 0)
1400     if i == -1:
1401         document.header.append("\\inputencoding auto")
1402     else:
1403         inputenc = get_value(document.header, "\\inputencoding", i)
1404         if inputenc == "utf8x":
1405             document.header[i] = "\\inputencoding utf8"
1406     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1407
1408
1409 def revert_utf8plain(document):
1410     " Set utf8plain encoding to utf8. "
1411     i = find_token(document.header, "\\inputencoding", 0)
1412     if i == -1:
1413         document.header.append("\\inputencoding auto")
1414     else:
1415         inputenc = get_value(document.header, "\\inputencoding", i)
1416         if inputenc == "utf8-plain":
1417             document.header[i] = "\\inputencoding utf8"
1418     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1419
1420
1421 def revert_beamer_alert(document):
1422     " Revert beamer's \\alert inset back to ERT. "
1423     i = 0
1424     while 1:
1425         i = find_token(document.body, "\\begin_inset CharStyle Alert", i)
1426         if i == -1:
1427             return
1428         document.body[i] = "\\begin_inset ERT"
1429         i = i + 1
1430         while 1:
1431             if (document.body[i][:13] == "\\begin_layout"):
1432                 # Insert the \alert command
1433                 document.body[i + 1] = "\\alert{" + document.body[i + 1] + '}'
1434                 break
1435             i = i + 1
1436
1437         i = i + 1
1438
1439
1440 def revert_beamer_structure(document):
1441     " Revert beamer's \\structure inset back to ERT. "
1442     i = 0
1443     while 1:
1444         i = find_token(document.body, "\\begin_inset CharStyle Structure", i)
1445         if i == -1:
1446             return
1447         document.body[i] = "\\begin_inset ERT"
1448         i = i + 1
1449         while 1:
1450             if (document.body[i][:13] == "\\begin_layout"):
1451                 document.body[i + 1] = "\\structure{" + document.body[i + 1] + '}'
1452                 break
1453             i = i + 1
1454
1455         i = i + 1
1456
1457
1458 def convert_changes(document):
1459     " Switch output_changes off if tracking_changes is off. "
1460     i = find_token(document.header, '\\tracking_changes', 0)
1461     if i == -1:
1462         document.warning("Malformed lyx document: Missing '\\tracking_changes'.")
1463         return
1464     j = find_token(document.header, '\\output_changes', 0)
1465     if j == -1:
1466         document.warning("Malformed lyx document: Missing '\\output_changes'.")
1467         return
1468     tracking_changes = get_value(document.header, "\\tracking_changes", i)
1469     output_changes = get_value(document.header, "\\output_changes", j)
1470     if tracking_changes == "false" and output_changes == "true":
1471         document.header[j] = "\\output_changes false"
1472
1473
1474 def revert_ascii(document):
1475     " Set ascii encoding to auto. "
1476     i = find_token(document.header, "\\inputencoding", 0)
1477     if i == -1:
1478         document.header.append("\\inputencoding auto")
1479     else:
1480         inputenc = get_value(document.header, "\\inputencoding", i)
1481         if inputenc == "ascii":
1482             document.header[i] = "\\inputencoding auto"
1483     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1484
1485
1486 def normalize_language_name(document):
1487     lang = { "brazil": "brazilian",
1488              "portuges": "portuguese"}
1489
1490     if document.language in lang:
1491         document.language = lang[document.language]
1492         i = find_token(document.header, "\\language", 0)
1493         document.header[i] = "\\language %s" % document.language
1494
1495
1496 def revert_language_name(document):
1497     lang = { "brazilian": "brazil",
1498              "portuguese": "portuges"}
1499
1500     if document.language in lang:
1501         document.language = lang[document.language]
1502         i = find_token(document.header, "\\language", 0)
1503         document.header[i] = "\\language %s" % document.language
1504
1505 #
1506 #  \textclass cv -> \textclass simplecv
1507 def convert_cv_textclass(document):
1508     if document.textclass == "cv":
1509         document.textclass = "simplecv"
1510
1511
1512 def revert_cv_textclass(document):
1513     if document.textclass == "simplecv":
1514         document.textclass = "cv"
1515
1516
1517 #
1518 # add scaleBeforeRotation graphics param
1519 def convert_graphics_rotation(document):
1520     " add scaleBeforeRotation graphics parameter. "
1521     i = 0
1522     while 1:
1523         i = find_token(document.body, "\\begin_inset Graphics", i)
1524         if i == -1:
1525             return
1526         j = find_end_of_inset(document.body, i+1)
1527         if j == -1:
1528             # should not happen
1529             document.warning("Malformed LyX document: Could not find end of graphics inset.")
1530         # Seach for rotateAngle and width or height or scale
1531         # If these params are not there, nothing needs to be done.
1532         k = find_token(document.body, "\trotateAngle", i + 1, j)
1533         l = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1534         if (k != -1 and l != -1):
1535             document.body.insert(j, 'scaleBeforeRotation')
1536         i = i + 1
1537
1538
1539 #
1540 # remove scaleBeforeRotation graphics param
1541 def revert_graphics_rotation(document):
1542     " remove scaleBeforeRotation graphics parameter. "
1543     i = 0
1544     while 1:
1545         i = find_token(document.body, "\\begin_inset Graphics", i)
1546         if i == -1:
1547             return
1548         j = find_end_of_inset(document.body, i + 1)
1549         if j == -1:
1550             # should not happen
1551             document.warning("Malformed LyX document: Could not find end of graphics inset.")
1552         # If there's a scaleBeforeRotation param, just remove that
1553         k = find_token(document.body, "\tscaleBeforeRotation", i + 1, j)
1554         if k != -1:
1555             del document.body[k]
1556         else:
1557             # if not, and if we have rotateAngle and width or height or scale,
1558             # we have to put the rotateAngle value to special
1559             rotateAngle = get_value(document.body, 'rotateAngle', i + 1, j)
1560             special = get_value(document.body, 'special', i + 1, j)
1561             if rotateAngle != "":
1562                 k = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1563                 if k == -1:
1564                     break
1565                 if special == "":
1566                     document.body.insert(j-1, '\tspecial angle=%s' % rotateAngle)
1567                 else:
1568                     l = find_token(document.body, "\tspecial", i + 1, j)
1569                     document.body[l] = document.body[l].replace(special, 'angle=%s,%s' % (rotateAngle, special))
1570                 k = find_token(document.body, "\trotateAngle", i + 1, j)
1571                 if k != -1:
1572                     del document.body[k]
1573         i = i + 1
1574
1575
1576
1577 def convert_tableborder(document):
1578     # The problematic is: LyX double the table cell border as it ignores the "|" character in
1579     # the cell arguments. A fix takes care of this and therefore the "|" has to be removed
1580     i = 0
1581     while i < len(document.body):
1582         h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1583         k = document.body[i].find("|>{", 0, len(document.body[i]))
1584         # the two tokens have to be in one line
1585         if (h != -1 and k != -1):
1586             # delete the "|"
1587             document.body[i] = document.body[i][:k] + document.body[i][k+1:len(document.body[i])-1]
1588         i = i + 1
1589
1590
1591 def revert_tableborder(document):
1592     i = 0
1593     while i < len(document.body):
1594         h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1595         k = document.body[i].find(">{", 0, len(document.body[i]))
1596         # the two tokens have to be in one line
1597         if (h != -1 and k != -1):
1598             # add the "|"
1599             document.body[i] = document.body[i][:k] + '|' + document.body[i][k:]
1600         i = i + 1
1601
1602
1603 def revert_armenian(document):
1604
1605     # set inputencoding from armscii8 to auto
1606     if document.inputencoding == "armscii8":
1607         i = find_token(document.header, "\\inputencoding", 0)
1608         if i != -1:
1609             document.header[i] = "\\inputencoding auto"
1610     # check if preamble exists, if not k is set to -1
1611     i = 0
1612     k = -1
1613     while i < len(document.preamble):
1614         if k == -1:
1615             k = document.preamble[i].find("\\", 0, len(document.preamble[i]))
1616         if k == -1:
1617             k = document.preamble[i].find("%", 0, len(document.preamble[i]))
1618         i = i + 1
1619     # add the entry \usepackage{armtex} to the document preamble
1620     if document.language == "armenian":
1621         # set the armtex entry as the first preamble line
1622         if k != -1:
1623             document.preamble[0:0] = ["\\usepackage{armtex}"]
1624         # create the preamble when it doesn't exist
1625         else:
1626             document.preamble.append('\\usepackage{armtex}')
1627     # Set document language from armenian to english
1628     if document.language == "armenian":
1629         document.language = "english"
1630         i = find_token(document.header, "\\language", 0)
1631         if i != -1:
1632             document.header[i] = "\\language english"
1633
1634
1635 def revert_CJK(document):
1636     " Set CJK encodings to default and languages chinese, japanese and korean to english. "
1637     encodings = ["Bg5", "Bg5+", "GB", "GBt", "GBK", "JIS",
1638                  "KS", "SJIS", "UTF8", "EUC-TW", "EUC-JP"]
1639     i = find_token(document.header, "\\inputencoding", 0)
1640     if i == -1:
1641         document.header.append("\\inputencoding auto")
1642     else:
1643         inputenc = get_value(document.header, "\\inputencoding", i)
1644         if inputenc in encodings:
1645             document.header[i] = "\\inputencoding default"
1646     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1647
1648     if document.language == "chinese-simplified" or \
1649        document.language == "chinese-traditional" or \
1650        document.language == "japanese" or document.language == "korean":
1651         document.language = "english"
1652         i = find_token(document.header, "\\language", 0)
1653         if i != -1:
1654             document.header[i] = "\\language english"
1655
1656
1657 def revert_preamble_listings_params(document):
1658     " Revert preamble option \listings_params "
1659     i = find_token(document.header, "\\listings_params", 0)
1660     if i != -1:
1661         document.preamble.append('\\usepackage{listings}')
1662         document.preamble.append('\\lstset{%s}' % document.header[i].split()[1].strip('"'))
1663         document.header.pop(i);
1664
1665
1666 def revert_listings_inset(document):
1667     r''' Revert listings inset to \lstinline or \begin, \end lstlisting, translate
1668 FROM
1669
1670 \begin_inset
1671 lstparams "language=Delphi"
1672 inline true
1673 status open
1674
1675 \begin_layout Standard
1676 var i = 10;
1677 \end_layout
1678
1679 \end_inset
1680
1681 TO
1682
1683 \begin_inset ERT
1684 status open
1685 \begin_layout Standard
1686
1687
1688 \backslash
1689 lstinline[language=Delphi]{var i = 10;}
1690 \end_layout
1691
1692 \end_inset
1693
1694 There can be an caption inset in this inset
1695
1696 \begin_layout Standard
1697 \begin_inset Caption
1698
1699 \begin_layout Standard
1700 before label
1701 \begin_inset LatexCommand label
1702 name "lst:caption"
1703
1704 \end_inset
1705
1706 after label
1707 \end_layout
1708
1709 \end_inset
1710
1711
1712 \end_layout
1713
1714 '''
1715     i = 0
1716     while True:
1717         i = find_token(document.body, '\\begin_inset listings', i)
1718         if i == -1:
1719             break
1720         else:
1721             if not '\\usepackage{listings}' in document.preamble:
1722                 document.preamble.append('\\usepackage{listings}')
1723         j = find_end_of_inset(document.body, i + 1)
1724         if j == -1:
1725             # this should not happen
1726             break
1727         inline = 'false'
1728         params = ''
1729         status = 'open'
1730         # first three lines
1731         for line in range(i + 1, i + 4):
1732             if document.body[line].startswith('inline'):
1733                 inline = document.body[line].split()[1]
1734             if document.body[line].startswith('lstparams'):
1735                 params = document.body[line].split()[1].strip('"')
1736             if document.body[line].startswith('status'):
1737                 status = document.body[line].split()[1].strip()
1738                 k = line + 1
1739         # caption?
1740         caption = ''
1741         label = ''
1742         cap = find_token(document.body, '\\begin_inset Caption', i)
1743         if cap != -1:
1744             cap_end = find_end_of_inset(document.body, cap + 1)
1745             if cap_end == -1:
1746                 # this should not happen
1747                 break
1748             # label?
1749             lbl = find_token(document.body, '\\begin_inset LatexCommand label', cap + 1)
1750             if lbl != -1:
1751                 lbl_end = find_end_of_inset(document.body, lbl + 1)
1752                 if lbl_end == -1:
1753                     # this should not happen
1754                     break
1755             else:
1756                 lbl = cap_end
1757                 lbl_end = cap_end
1758             for line in document.body[lbl : lbl_end + 1]:
1759                 if line.startswith('name '):
1760                     label = line.split()[1].strip('"')
1761                     break
1762             for line in document.body[cap : lbl ] + document.body[lbl_end + 1 : cap_end + 1]:
1763                 if not line.startswith('\\'):
1764                     caption += line.strip()
1765             k = cap_end + 1
1766         inlinecode = ''
1767         # looking for the oneline code for lstinline
1768         inlinecode = document.body[find_end_of_layout(document.body,
1769             find_token(document.body,  '\\begin_layout %s' % document.default_layout, i + 1) +1 ) - 1]
1770         if len(caption) > 0:
1771             if len(params) == 0:
1772                 params = 'caption={%s}' % caption
1773             else:
1774                 params += ',caption={%s}' % caption
1775         if len(label) > 0:
1776             if len(params) == 0:
1777                 params = 'label={%s}' % label
1778             else:
1779                 params += ',label={%s}' % label
1780         if len(params) > 0:
1781             params = '[%s]' % params
1782             params = params.replace('\\', '\\backslash\n')
1783         if inline == 'true':
1784             document.body[i:(j+1)] = [r'\begin_inset ERT',
1785                                       'status %s' % status,
1786                                       r'\begin_layout %s' % document.default_layout,
1787                                       '',
1788                                       '',
1789                                       r'\backslash',
1790                                       'lstinline%s{%s}' % (params, inlinecode),
1791                                       r'\end_layout',
1792                                       '',
1793                                       r'\end_inset']
1794         else:
1795             document.body[i: j+1] =  [r'\begin_inset ERT',
1796                                       'status %s' % status,
1797                                       '',
1798                                       r'\begin_layout %s' % document.default_layout,
1799                                       '',
1800                                       '',
1801                                       r'\backslash',
1802                                       r'begin{lstlisting}%s' % params,
1803                                       r'\end_layout'
1804                                     ] + document.body[k : j - 1] + \
1805                                      ['',
1806                                       r'\begin_layout %s' % document.default_layout,
1807                                       '',
1808                                       r'\backslash',
1809                                       'end{lstlisting}',
1810                                       r'\end_layout',
1811                                       '',
1812                                       r'\end_inset']
1813
1814
1815 def revert_include_listings(document):
1816     r''' Revert lstinputlisting Include option , translate
1817 \begin_inset Include \lstinputlisting{file}[opt]
1818 preview false
1819
1820 \end_inset
1821
1822 TO
1823
1824 \begin_inset ERT
1825 status open
1826
1827 \begin_layout Standard
1828
1829
1830 \backslash
1831 lstinputlisting{file}[opt]
1832 \end_layout
1833
1834 \end_inset
1835     '''
1836
1837     i = 0
1838     while True:
1839         i = find_token(document.body, r'\begin_inset Include \lstinputlisting', i)
1840         if i == -1:
1841             break
1842         else:
1843             if not '\\usepackage{listings}' in document.preamble:
1844                 document.preamble.append('\\usepackage{listings}')
1845         j = find_end_of_inset(document.body, i + 1)
1846         if j == -1:
1847             # this should not happen
1848             break
1849         # find command line lstinputlisting{file}[options]
1850         cmd, file, option = '', '', ''
1851         if re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]):
1852             cmd, file, option = re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]).groups()
1853         option = option.replace('\\', '\\backslash\n')
1854         document.body[i : j + 1] = [r'\begin_inset ERT',
1855                                     'status open',
1856                                     '',
1857                                     r'\begin_layout %s' % document.default_layout,
1858                                     '',
1859                                     '',
1860                                     r'\backslash',
1861                                     '%s%s{%s}' % (cmd, option, file),
1862                                     r'\end_layout',
1863                                     '',
1864                                     r'\end_inset']
1865
1866
1867 def revert_ext_font_sizes(document):
1868     if document.backend != "latex": return
1869     if not document.textclass.startswith("ext"): return
1870
1871     fontsize = get_value(document.header, '\\paperfontsize', 0)
1872     if fontsize not in ('10', '11', '12'): return
1873     fontsize += 'pt'
1874
1875     i = find_token(document.header, '\\paperfontsize', 0)
1876     document.header[i] = '\\paperfontsize default'
1877
1878     i = find_token(document.header, '\\options', 0)
1879     if i == -1:
1880         i = find_token(document.header, '\\textclass', 0) + 1
1881         document.header[i:i] = ['\\options %s' % fontsize]
1882     else:
1883         document.header[i] += ',%s' % fontsize
1884
1885
1886 def convert_ext_font_sizes(document):
1887     if document.backend != "latex": return
1888     if not document.textclass.startswith("ext"): return
1889
1890     fontsize = get_value(document.header, '\\paperfontsize', 0)
1891     if fontsize != 'default': return
1892
1893     i = find_token(document.header, '\\options', 0)
1894     if i == -1: return
1895
1896     options = get_value(document.header, '\\options', i)
1897
1898     fontsizes = '10pt', '11pt', '12pt'
1899     for fs in fontsizes:
1900         if options.find(fs) != -1:
1901             break
1902     else: # this else will only be attained if the for cycle had no match
1903         return
1904
1905     options = options.split(',')
1906     for j, opt in enumerate(options):
1907         if opt in fontsizes:
1908             fontsize = opt[:-2]
1909             del options[j]
1910             break
1911     else:
1912         return
1913
1914     k = find_token(document.header, '\\paperfontsize', 0)
1915     document.header[k] = '\\paperfontsize %s' % fontsize
1916
1917     if options:
1918         document.header[i] = '\\options %s' % ','.join(options)
1919     else:
1920         del document.header[i]
1921
1922
1923 def revert_separator_layout(document):
1924     r'''Revert --Separator-- to a lyx note
1925 From
1926
1927 \begin_layout --Separator--
1928 something
1929 \end_layout
1930
1931 to
1932
1933 \begin_layout Standard
1934 \begin_inset Note Note
1935 status open
1936
1937 \begin_layout Standard
1938 Separate Evironment
1939 \end_layout
1940
1941 \end_inset
1942 something
1943
1944 \end_layout
1945
1946     '''
1947
1948     i = 0
1949     while True:
1950         i = find_token(document.body, r'\begin_layout --Separator--', i)
1951         if i == -1:
1952             break
1953         j = find_end_of_layout(document.body, i + 1)
1954         if j == -1:
1955             # this should not happen
1956             break
1957         document.body[i : j + 1] = [r'\begin_layout %s' % document.default_layout,
1958                                     r'\begin_inset Note Note',
1959                                     'status open',
1960                                     '',
1961                                     r'\begin_layout %s' % document.default_layout,
1962                                     'Separate Environment',
1963                                     r'\end_layout',
1964                                     '',
1965                                     r'\end_inset'] + \
1966                                     document.body[ i + 1 : j] + \
1967                                     ['',
1968                                     r'\end_layout'
1969                                     ]
1970
1971
1972 def convert_arabic (document):
1973     if document.language == "arabic":
1974         document.language = "arabic_arabtex"
1975         i = find_token(document.header, "\\language", 0)
1976         if i != -1:
1977             document.header[i] = "\\language arabic_arabtex"
1978     i = 0
1979     while i < len(document.body):
1980         h = document.body[i].find("\lang arabic", 0, len(document.body[i]))
1981         if (h != -1):
1982             # change the language name
1983             document.body[i] = '\lang arabic_arabtex'
1984         i = i + 1
1985
1986
1987 def revert_arabic (document):
1988     if document.language == "arabic_arabtex":
1989         document.language = "arabic"
1990         i = find_token(document.header, "\\language", 0)
1991         if i != -1:
1992             document.header[i] = "\\language arabic"
1993     i = 0
1994     while i < len(document.body):
1995         h = document.body[i].find("\lang arabic_arabtex", 0, len(document.body[i]))
1996         if (h != -1):
1997             # change the language name
1998             document.body[i] = '\lang arabic'
1999         i = i + 1
2000
2001
2002 ##
2003 # Conversion hub
2004 #
2005
2006 supported_versions = ["1.5.0","1.5"]
2007 convert = [[246, []],
2008            [247, [convert_font_settings]],
2009            [248, []],
2010            [249, [convert_utf8]],
2011            [250, []],
2012            [251, []],
2013            [252, [convert_commandparams, convert_bibitem]],
2014            [253, []],
2015            [254, [convert_esint]],
2016            [255, []],
2017            [256, []],
2018            [257, [convert_caption]],
2019            [258, [convert_lyxline]],
2020            [259, [convert_accent, normalize_font_whitespace_259]],
2021            [260, []],
2022            [261, [convert_changes]],
2023            [262, []],
2024            [263, [normalize_language_name]],
2025            [264, [convert_cv_textclass]],
2026            [265, [convert_tableborder]],
2027            [266, []],
2028            [267, []],
2029            [268, []],
2030            [269, []],
2031            [270, []],
2032            [271, [convert_ext_font_sizes]],
2033            [272, []],
2034            [273, []],
2035            [274, [normalize_font_whitespace_274]],
2036            [275, [convert_graphics_rotation]],
2037            [276, [convert_arabic]]
2038           ]
2039
2040 revert =  [
2041            [275, [revert_arabic]],
2042            [274, [revert_graphics_rotation]],
2043            [273, []],
2044            [272, [revert_separator_layout]],
2045            [271, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
2046            [270, [revert_ext_font_sizes]],
2047            [269, [revert_beamer_alert, revert_beamer_structure]],
2048            [268, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
2049            [267, [revert_CJK]],
2050            [266, [revert_utf8plain]],
2051            [265, [revert_armenian]],
2052            [264, [revert_tableborder]],
2053            [263, [revert_cv_textclass]],
2054            [262, [revert_language_name]],
2055            [261, [revert_ascii]],
2056            [260, []],
2057            [259, [revert_utf8x]],
2058            [258, []],
2059            [257, []],
2060            [256, [revert_caption]],
2061            [255, [revert_encodings]],
2062            [254, [revert_clearpage, revert_cleardoublepage]],
2063            [253, [revert_esint]],
2064            [252, [revert_nomenclature, revert_printnomenclature]],
2065            [251, [revert_commandparams]],
2066            [250, [revert_cs_label]],
2067            [249, []],
2068            [248, [revert_accent, revert_utf8, revert_unicode]],
2069            [247, [revert_booktabs]],
2070            [246, [revert_font_settings]],
2071            [245, [revert_framed]]]
2072
2073
2074 if __name__ == "__main__":
2075     pass