lib/scripts/csv2lyx.py

   1 #! /usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 # file csv2lyx.py
   5 # This file is part of LyX, the document processor.
   6 # Licence details can be found in the file COPYING.
   7
   8 # author Hartmut Haase
   9 # author Uwe Stöhr
  10 # author José Matos
  11 # Full author contact details are available in file CREDITS
  12
  13 # This script reads a csv-table (file name.csv) and converts it into
  14 # a LyX-table for versions 1.5.0 and higher (LyX table format 276).
  15 # It uses Python's csv module for parsing.
  16 # The original csv2lyx was witten by Antonio Gulino <antonio.gulino@tin.it>
  17 # in Perl for LyX 1.x and modified for LyX table format 276 by the author.
  18 #
  19 import csv, unicodedata
  20 import os, sys
  21 import optparse
  22
  23 def error(message):
  24     sys.stderr.write(message + '\n')
  25     sys.exit(1)
  26
  27 header = """#csv2lyx created this file
  28 \lyxformat 276
  29 \\begin_document
  30 \\begin_header
  31 \\textclass article
  32 \\inputencoding auto
  33 \\font_roman default
  34 \\font_sans default
  35 \\font_typewriter default
  36 \\font_default_family default
  37 \\font_sc false
  38 \\font_osf false
  39 \\font_sf_scale 100
  40 \\font_tt_scale 100
  41 \\graphics default
  42 \\paperfontsize default
  43 \\papersize default
  44 \\use_geometry false
  45 \\use_amsmath 1
  46 \\use_esint 0
  47 \\cite_engine basic
  48 \\use_bibtopic false
  49 \\paperorientation portrait
  50 \\secnumdepth 3
  51 \\tocdepth 3
  52 \\paragraph_separation indent
  53 \\defskip medskip
  54 \\papercolumns 1
  55 \\papersides 1
  56 \\paperpagestyle default
  57 \\tracking_changes false
  58 \\output_changes false
  59 \\end_header
  60
  61 \\begin_body
  62
  63 \\begin_layout Standard
  64 \\align left
  65 \\begin_inset Tabular
  66 <lyxtabular version="3" rows="%d" columns="%d">
  67 <features>
  68 """
  69
  70 cell = """<cell alignment="left" valignment="top" usebox="none">
  71 \\begin_inset Text
  72
  73 \\begin_layout Standard
  74 %s
  75 \\end_layout
  76
  77 \\end_inset
  78 </cell>"""
  79
  80 footer = """</lyxtabular>
  81
  82 \\end_inset
  83
  84
  85 \\end_layout
  86
  87 \\end_body
  88 \\end_document
  89 """
  90
  91 # processing command line options
  92 # delegate this to standard module optparse
  93 args = {}
  94 args["usage"] = "Usage: csv2lyx [options] csvfile [file.lyx]"
  95
  96 args["description"] = """This script creates a LyX document containing a table created from a
  97 comma-separated-value (CSV) file. The resulting LyX file can be opened
  98 with LyX 1.5.0 or any later version.
  99 If no options are given csv2lyx will try to infer the CSV type of the csvfile,
 100 """
 101 parser = optparse.OptionParser(**args)
 102
 103 parser.set_defaults(excel='', column_sep='')
 104 parser.add_option("-e", "--excel", metavar="CHAR",
 105                   help="""CHAR corresponds to a CSV type:
 106                        'e': Excel-generated CSV file
 107                        't': Excel-generated TAB-delimited CSV file""")
 108 parser.add_option("-s", "--separator", dest="column_sep",
 109                   help= """column separator
 110                                        't' means Tab""")
 111
 112 group = optparse.OptionGroup(parser, "Remarks", """If your CSV file contains special characters (e. g. umlauts,
 113    accented letters, etc.) make sure it is coded in UTF-8 (unicode).
 114    Else LyX will loose some cell contents. If your CSV file was not written according to the "Common Format and MIME Type for Comma-Separated Values (CSV) Files" (http://tools.ietf.org/html/rfc4180) there may be unexpected results.""")
 115 parser.add_option_group(group)
 116
 117 (options, args) = parser.parse_args()
 118
 119 # validate input
 120 if len(args) == 1:
 121     infile = args[0]
 122     fout = sys.stdout
 123 elif len(args) ==2:
 124     infile = args[0]
 125     fout = open(args[1], 'w')
 126 else:
 127     parser.print_help()
 128     sys.exit(1)
 129
 130 if not os.path.exists(infile):
 131         error('File "%s" not found.' % infile)
 132
 133 dialects = {'' : None, 'e' : 'excel', 't' : 'excel-tab'}
 134 if options.excel not in dialects:
 135     parser.print_help()
 136     sys.exit(1)
 137 dialect= dialects[options.excel]
 138
 139 # Set Tab, if necessary
 140 if options.column_sep == 't':
 141         options.column_sep = "\t"
 142
 143 # when no special column separator is given, try to detect it:
 144 if options.column_sep or dialect :
 145     reader = csv.reader(open(infile, "rb"), dialect= dialect, delimiter=options.column_sep)
 146 else:
 147     guesser = csv.Sniffer()
 148     input_file = "".join(open(infile,'rb').readlines())
 149     try:
 150         dialect = guesser.sniff(input_file)
 151         reader = csv.reader(open(infile, "rb"), dialect= dialect)
 152     except:
 153         reader = csv.reader(open(infile, "rb"), dialect= dialect, delimiter=',')
 154
 155 # read input
 156 num_cols = 1 # max columns
 157 rows = []
 158
 159 for row in reader:
 160     num_cols = max(num_cols, len(row))
 161     rows.append(row)
 162
 163 num_rows = reader.line_num # number of lines
 164
 165 # create a LyX file
 166 #####################
 167 # write first part
 168 ####################
 169 fout.write(header % (num_rows, num_cols))
 170
 171 #####################
 172 # write table
 173 ####################
 174 for i in range(num_cols):
 175         fout.write('<column alignment="left" valignment="top" width="0pt">\n')
 176
 177 for j in range(num_rows):
 178     row = ['<row>']
 179
 180     ############################
 181     # write contents of one line
 182     ############################
 183     for i in range(len(rows[j])):
 184         row.append( cell % rows[j][i])
 185
 186     # If row has less columns than num_cols fill with blank entries
 187     for i in range(len(rows[j]), num_cols):
 188         row.append(cell % " ")
 189
 190     fout.write("\n".join(row) + '\n</row>\n')
 191
 192 #####################
 193 # write last part
 194 ####################
 195 fout.write(footer)
 196 # close the LyX file
 197 fout.close()