Update file to the last version discussed on bugzilla and on the devel list
[lyx.git] / lib / scripts / csv2lyx.py
blob2e8bbbb1d8b23cbeca3f2a533e0478b77681b6cc
1 #! /usr/bin/env python
2 # -*- coding: utf-8 -*-
4 # file csv2lyx.py
5 # This file is part of LyX, the document processor.
6 # Licence details can be found in the file COPYING.
8 # author Hartmut Haase
9 # author Uwe Stöhr
10 # author José Matos
11 # Full author contact details are available in file CREDITS
13 # This script reads a csv-table (file name.csv) and converts it into
14 # a LyX-table for versions 1.5.0 and higher (LyX table format 276).
15 # It uses Python's csv module for parsing.
16 # The original csv2lyx was witten by Antonio Gulino <antonio.gulino@tin.it>
17 # in Perl for LyX 1.x and modified for LyX table format 276 by the author.
19 import csv, unicodedata
20 import os, sys
21 import optparse
23 def error(message):
24 sys.stderr.write(message + '\n')
25 sys.exit(1)
27 header = """#csv2lyx created this file
28 \lyxformat 276
29 \\begin_document
30 \\begin_header
31 \\textclass article
32 \\inputencoding auto
33 \\font_roman default
34 \\font_sans default
35 \\font_typewriter default
36 \\font_default_family default
37 \\font_sc false
38 \\font_osf false
39 \\font_sf_scale 100
40 \\font_tt_scale 100
41 \\graphics default
42 \\paperfontsize default
43 \\papersize default
44 \\use_geometry false
45 \\use_amsmath 1
46 \\use_esint 0
47 \\cite_engine basic
48 \\use_bibtopic false
49 \\paperorientation portrait
50 \\secnumdepth 3
51 \\tocdepth 3
52 \\paragraph_separation indent
53 \\defskip medskip
54 \\papercolumns 1
55 \\papersides 1
56 \\paperpagestyle default
57 \\tracking_changes false
58 \\output_changes false
59 \\end_header
61 \\begin_body
63 \\begin_layout Standard
64 \\align left
65 \\begin_inset Tabular
66 <lyxtabular version="3" rows="%d" columns="%d">
67 <features>
68 """
70 cell = """<cell alignment="left" valignment="top" usebox="none">
71 \\begin_inset Text
73 \\begin_layout Standard
75 \\end_layout
77 \\end_inset
78 </cell>"""
80 footer = """</lyxtabular>
82 \\end_inset
85 \\end_layout
87 \\end_body
88 \\end_document
89 """
91 # processing command line options
92 # delegate this to standard module optparse
93 args = {}
94 args["usage"] = "Usage: csv2lyx [options] csvfile [file.lyx]"
96 args["description"] = """This script creates a LyX document containing a table created from a
97 comma-separated-value (CSV) file. The resulting LyX file can be opened
98 with LyX 1.5.0 or any later version.
99 If no options are given csv2lyx will try to infer the CSV type of the csvfile,
101 parser = optparse.OptionParser(**args)
103 parser.set_defaults(excel='', column_sep='')
104 parser.add_option("-e", "--excel", metavar="CHAR",
105 help="""CHAR corresponds to a CSV type:
106 'e': Excel-generated CSV file
107 't': Excel-generated TAB-delimited CSV file""")
108 parser.add_option("-s", "--separator", dest="column_sep",
109 help= """column separator
110 't' means Tab""")
112 group = optparse.OptionGroup(parser, "Remarks", """If your CSV file contains special characters (e. g. umlauts,
113 accented letters, etc.) make sure it is coded in UTF-8 (unicode).
114 Else LyX will loose some cell contents. If your CSV file was not written according to the "Common Format and MIME Type for Comma-Separated Values (CSV) Files" (http://tools.ietf.org/html/rfc4180) there may be unexpected results.""")
115 parser.add_option_group(group)
117 (options, args) = parser.parse_args()
119 # validate input
120 if len(args) == 1:
121 infile = args[0]
122 fout = sys.stdout
123 elif len(args) ==2:
124 infile = args[0]
125 fout = open(args[1], 'w')
126 else:
127 parser.print_help()
128 sys.exit(1)
130 if not os.path.exists(infile):
131 error('File "%s" not found.' % infile)
133 dialects = {'' : None, 'e' : 'excel', 't' : 'excel-tab'}
134 if options.excel not in dialects:
135 parser.print_help()
136 sys.exit(1)
137 dialect= dialects[options.excel]
139 # Set Tab, if necessary
140 if options.column_sep == 't':
141 options.column_sep = "\t"
143 # when no special column separator is given, try to detect it:
144 if options.column_sep or dialect :
145 reader = csv.reader(open(infile, "rb"), dialect= dialect, delimiter=options.column_sep)
146 else:
147 guesser = csv.Sniffer()
148 input_file = "".join(open(infile,'rb').readlines())
149 try:
150 dialect = guesser.sniff(input_file)
151 reader = csv.reader(open(infile, "rb"), dialect= dialect)
152 except:
153 reader = csv.reader(open(infile, "rb"), dialect= dialect, delimiter=',')
155 # read input
156 num_cols = 1 # max columns
157 rows = []
159 for row in reader:
160 num_cols = max(num_cols, len(row))
161 rows.append(row)
163 num_rows = reader.line_num # number of lines
165 # create a LyX file
166 #####################
167 # write first part
168 ####################
169 fout.write(header % (num_rows, num_cols))
171 #####################
172 # write table
173 ####################
174 for i in range(num_cols):
175 fout.write('<column alignment="left" valignment="top" width="0pt">\n')
177 for j in range(num_rows):
178 row = ['<row>']
180 ############################
181 # write contents of one line
182 ############################
183 for i in range(len(rows[j])):
184 row.append( cell % rows[j][i])
186 # If row has less columns than num_cols fill with blank entries
187 for i in range(len(rows[j]), num_cols):
188 row.append(cell % " ")
190 fout.write("\n".join(row) + '\n</row>\n')
192 #####################
193 # write last part
194 ####################
195 fout.write(footer)
196 # close the LyX file
197 fout.close()