Tab and white space clean up.
[phpmyadmin-themes.git] / libraries / import / csv.php
blobdb8d09cc1a12989cb3a0123a1993cfea258b0992
1 <?php
2 /* vim: set expandtab sw=4 ts=4 sts=4: */
3 /**
4 * CSV import plugin for phpMyAdmin
6 * @todo add an option for handling NULL values
7 * @package phpMyAdmin-Import
8 */
9 if (! defined('PHPMYADMIN')) {
10 exit;
13 $analyze = false;
15 if ($plugin_param !== 'table') {
16 $analyze = true;
19 if (isset($plugin_list)) {
20 $plugin_list['csv'] = array(
21 'text' => __('CSV'),
22 'extension' => 'csv',
23 'options' => array(
24 array('type' => 'begin_group', 'name' => 'general_opts'),
25 array('type' => 'bool', 'name' => 'replace', 'text' => __('Replace table data with file')),
26 array('type' => 'bool', 'name' => 'ignore', 'text' => __('Do not abort on INSERT error')),
27 array('type' => 'text', 'name' => 'terminated', 'text' => __('Columns separated with:'), 'size' => 2, 'len' => 2),
28 array('type' => 'text', 'name' => 'enclosed', 'text' => __('Columns enclosed with:'), 'size' => 2, 'len' => 2),
29 array('type' => 'text', 'name' => 'escaped', 'text' => __('Columns escaped with:'), 'size' => 2, 'len' => 2),
30 array('type' => 'text', 'name' => 'new_line', 'text' => __('Lines terminated with:'), 'size' => 2),
32 'options_text' => __('Options'),
35 if ($plugin_param !== 'table') {
36 $plugin_list['csv']['options'][] =
37 array('type' => 'bool', 'name' => 'col_names', 'text' => __('The first line of the file contains the table column names <i>(if this is unchecked, the first line will become part of the data)</i>'));
38 } else {
39 $hint = new PMA_Message(__('If the data in each row of the file is not in the same order as in the database, list the corresponding column names here. Column names must be separated by commas and not enclosed in quotations.'));
40 $plugin_list['csv']['options'][] =
41 array('type' => 'text', 'name' => 'columns', 'text' => __('Column names: ' . PMA_showHint($hint)));
43 $plugin_list['csv']['options'][] = array('type' => 'end_group');
45 /* We do not define function when plugin is just queried for information above */
46 return;
49 $replacements = array(
50 '\\n' => "\n",
51 '\\t' => "\t",
52 '\\r' => "\r",
54 $csv_terminated = strtr($csv_terminated, $replacements);
55 $csv_enclosed = strtr($csv_enclosed, $replacements);
56 $csv_escaped = strtr($csv_escaped, $replacements);
57 $csv_new_line = strtr($csv_new_line, $replacements);
59 if (strlen($csv_terminated) != 1) {
60 $message = PMA_Message::error(__('Invalid parameter for CSV import: %s'));
61 $message->addParam(__('Columns terminated by'), false);
62 $error = TRUE;
63 // The default dialog of MS Excel when generating a CSV produces a
64 // semi-colon-separated file with no chance of specifying the
65 // enclosing character. Thus, users who want to import this file
66 // tend to remove the enclosing character on the Import dialog.
67 // I could not find a test case where having no enclosing characters
68 // confuses this script.
69 // But the parser won't work correctly with strings so we allow just
70 // one character.
71 } elseif (strlen($csv_enclosed) > 1) {
72 $message = PMA_Message::error(__('Invalid parameter for CSV import: %s'));
73 $message->addParam(__('Columns enclosed by'), false);
74 $error = TRUE;
75 } elseif (strlen($csv_escaped) != 1) {
76 $message = PMA_Message::error(__('Invalid parameter for CSV import: %s'));
77 $message->addParam(__('Columns escaped by'), false);
78 $error = TRUE;
79 } elseif (strlen($csv_new_line) != 1 && $csv_new_line != 'auto') {
80 $message = PMA_Message::error(__('Invalid parameter for CSV import: %s'));
81 $message->addParam(__('Lines terminated by'), false);
82 $error = TRUE;
85 $buffer = '';
86 $required_fields = 0;
88 if (!$analyze) {
89 if (isset($csv_replace)) {
90 $sql_template = 'REPLACE';
91 } else {
92 $sql_template = 'INSERT';
93 if (isset($csv_ignore)) {
94 $sql_template .= ' IGNORE';
97 $sql_template .= ' INTO ' . PMA_backquote($table);
99 $tmp_fields = PMA_DBI_get_fields($db, $table);
101 if (empty($csv_columns)) {
102 $fields = $tmp_fields;
103 } else {
104 $sql_template .= ' (';
105 $fields = array();
106 $tmp = preg_split('/,( ?)/', $csv_columns);
107 foreach ($tmp as $key => $val) {
108 if (count($fields) > 0) {
109 $sql_template .= ', ';
111 /* Trim also `, if user already included backquoted fields */
112 $val = trim($val, " \t\r\n\0\x0B`");
113 $found = FALSE;
114 foreach ($tmp_fields as $id => $field) {
115 if ($field['Field'] == $val) {
116 $found = TRUE;
117 break;
120 if (!$found) {
121 $message = PMA_Message::error(__('Invalid column (%s) specified! Ensure that columns names are spelled correctly, separated by commas, and not enclosed in quotes.' ));
122 $message->addParam($val);
123 $error = TRUE;
124 break;
126 $fields[] = $field;
127 $sql_template .= PMA_backquote($val);
129 $sql_template .= ') ';
132 $required_fields = count($fields);
134 $sql_template .= ' VALUES (';
137 // Defaults for parser
138 $i = 0;
139 $len = 0;
140 $line = 1;
141 $lasti = -1;
142 $values = array();
143 $csv_finish = FALSE;
145 $tempRow = array();
146 $rows = array();
147 $col_names = array();
148 $tables = array();
150 $col_count = 0;
151 $max_cols = 0;
153 while (!($finished && $i >= $len) && !$error && !$timeout_passed) {
154 $data = PMA_importGetNextChunk();
155 if ($data === FALSE) {
156 // subtract data we didn't handle yet and stop processing
157 $offset -= strlen($buffer);
158 break;
159 } elseif ($data === TRUE) {
160 // Handle rest of buffer
161 } else {
162 // Append new data to buffer
163 $buffer .= $data;
164 unset($data);
165 // Do not parse string when we're not at the end and don't have new line inside
166 if (($csv_new_line == 'auto' && strpos($buffer, "\r") === FALSE && strpos($buffer, "\n") === FALSE)
167 || ($csv_new_line != 'auto' && strpos($buffer, $csv_new_line) === FALSE)) {
168 continue;
172 // Current length of our buffer
173 $len = strlen($buffer);
174 // Currently parsed char
175 $ch = $buffer[$i];
176 while ($i < $len) {
177 // Deadlock protection
178 if ($lasti == $i && $lastlen == $len) {
179 $message = PMA_Message::error(__('Invalid format of CSV input on line %d.'));
180 $message->addParam($line);
181 $error = TRUE;
182 break;
184 $lasti = $i;
185 $lastlen = $len;
187 // This can happen with auto EOL and \r at the end of buffer
188 if (!$csv_finish) {
189 // Grab empty field
190 if ($ch == $csv_terminated) {
191 if ($i == $len - 1) {
192 break;
194 $values[] = '';
195 $i++;
196 $ch = $buffer[$i];
197 continue;
200 // Grab one field
201 $fallbacki = $i;
202 if ($ch == $csv_enclosed) {
203 if ($i == $len - 1) {
204 break;
206 $need_end = TRUE;
207 $i++;
208 $ch = $buffer[$i];
209 } else {
210 $need_end = FALSE;
212 $fail = FALSE;
213 $value = '';
214 while (($need_end && $ch != $csv_enclosed)
215 || (!$need_end && !($ch == $csv_terminated
216 || $ch == $csv_new_line || ($csv_new_line == 'auto'
217 && ($ch == "\r" || $ch == "\n"))))) {
218 if ($ch == $csv_escaped) {
219 if ($i == $len - 1) {
220 $fail = TRUE;
221 break;
223 $i++;
224 $ch = $buffer[$i];
226 $value .= $ch;
227 if ($i == $len - 1) {
228 if (!$finished) {
229 $fail = TRUE;
231 break;
233 $i++;
234 $ch = $buffer[$i];
237 // unquoted NULL string
238 if (false === $need_end && $value === 'NULL') {
239 $value = null;
242 if ($fail) {
243 $i = $fallbacki;
244 $ch = $buffer[$i];
245 break;
247 // Need to strip trailing enclosing char?
248 if ($need_end && $ch == $csv_enclosed) {
249 if ($finished && $i == $len - 1) {
250 $ch = NULL;
251 } elseif ($i == $len - 1) {
252 $i = $fallbacki;
253 $ch = $buffer[$i];
254 break;
255 } else {
256 $i++;
257 $ch = $buffer[$i];
260 // Are we at the end?
261 if ($ch == $csv_new_line || ($csv_new_line == 'auto' && ($ch == "\r" || $ch == "\n")) || ($finished && $i == $len - 1)) {
262 $csv_finish = TRUE;
264 // Go to next char
265 if ($ch == $csv_terminated) {
266 if ($i == $len - 1) {
267 $i = $fallbacki;
268 $ch = $buffer[$i];
269 break;
271 $i++;
272 $ch = $buffer[$i];
274 // If everything went okay, store value
275 $values[] = $value;
278 // End of line
279 if ($csv_finish || $ch == $csv_new_line || ($csv_new_line == 'auto' && ($ch == "\r" || $ch == "\n"))) {
280 if ($csv_new_line == 'auto' && $ch == "\r") { // Handle "\r\n"
281 if ($i >= ($len - 2) && !$finished) {
282 break; // We need more data to decide new line
284 if ($buffer[$i + 1] == "\n") {
285 $i++;
288 // We didn't parse value till the end of line, so there was empty one
289 if (!$csv_finish) {
290 $values[] = '';
293 if ($analyze) {
294 foreach ($values as $ley => $val) {
295 $tempRow[] = $val;
296 ++$col_count;
299 if ($col_count > $max_cols) {
300 $max_cols = $col_count;
302 $col_count = 0;
304 $rows[] = $tempRow;
305 $tempRow = array();
306 } else {
307 // Do we have correct count of values?
308 if (count($values) != $required_fields) {
310 // Hack for excel
311 if ($values[count($values) - 1] == ';') {
312 unset($values[count($values) - 1]);
313 } else {
314 $message = PMA_Message::error(__('Invalid column count in CSV input on line %d.'));
315 $message->addParam($line);
316 $error = TRUE;
317 break;
321 $first = TRUE;
322 $sql = $sql_template;
323 foreach ($values as $key => $val) {
324 if (!$first) {
325 $sql .= ', ';
327 if ($val === null) {
328 $sql .= 'NULL';
329 } else {
330 $sql .= '\'' . addslashes($val) . '\'';
333 $first = FALSE;
335 $sql .= ')';
338 * @todo maybe we could add original line to verbose SQL in comment
340 PMA_importRunQuery($sql, $sql);
343 $line++;
344 $csv_finish = FALSE;
345 $values = array();
346 $buffer = substr($buffer, $i + 1);
347 $len = strlen($buffer);
348 $i = 0;
349 $lasti = -1;
350 $ch = $buffer[0];
352 } // End of parser loop
353 } // End of import loop
355 if ($analyze) {
356 /* Fill out all rows */
357 $num_rows = count($rows);
358 for ($i = 0; $i < $num_rows; ++$i) {
359 for ($j = count($rows[$i]); $j < $max_cols; ++$j) {
360 $rows[$i][] = 'NULL';
364 if (isset($_REQUEST['csv_col_names'])) {
365 $col_names = array_splice($rows, 0, 1);
366 $col_names = $col_names[0];
369 if ((isset($col_names) && count($col_names) != $max_cols) || !isset($col_names)) {
370 // Fill out column names
371 for ($i = 0; $i < $max_cols; ++$i) {
372 $col_names[] = 'COL '.($i+1);
376 if (strlen($db)) {
377 $result = PMA_DBI_fetch_result('SHOW TABLES');
378 $tbl_name = 'TABLE '.(count($result) + 1);
379 } else {
380 $tbl_name = 'TBL_NAME';
383 $tables[] = array($tbl_name, $col_names, $rows);
385 /* Obtain the best-fit MySQL types for each column */
386 $analyses = array();
387 $analyses[] = PMA_analyzeTable($tables[0]);
390 * string $db_name (no backquotes)
392 * array $table = array(table_name, array() column_names, array()() rows)
393 * array $tables = array of "$table"s
395 * array $analysis = array(array() column_types, array() column_sizes)
396 * array $analyses = array of "$analysis"s
398 * array $create = array of SQL strings
400 * array $options = an associative array of options
403 /* Set database name to the currently selected one, if applicable */
404 if (strlen($db)) {
405 $db_name = $db;
406 $options = array('create_db' => false);
407 } else {
408 $db_name = 'CSV_DB';
409 $options = NULL;
412 /* Non-applicable parameters */
413 $create = NULL;
415 /* Created and execute necessary SQL statements from data */
416 PMA_buildSQL($db_name, $tables, $analyses, $create, $options);
418 unset($tables);
419 unset($analyses);
422 // Commit any possible data in buffers
423 PMA_importRunQuery();
425 if (count($values) != 0 && !$error) {
426 $message = PMA_Message::error(__('Invalid format of CSV input on line %d.'));
427 $message->addParam($line);
428 $error = TRUE;