Translated using Weblate.
[phpmyadmin.git] / libraries / import / csv.php
blob79e1cd30d24ebc5b37bcf22fc5bedf111e1f7ade
1 <?php
2 /* vim: set expandtab sw=4 ts=4 sts=4: */
3 /**
4 * CSV import plugin for phpMyAdmin
6 * @todo add an option for handling NULL values
7 * @package PhpMyAdmin-Import
8 * @subpackage CSV
9 */
10 if (! defined('PHPMYADMIN')) {
11 exit;
14 $analyze = false;
16 if ($plugin_param !== 'table') {
17 $analyze = true;
20 if (isset($plugin_list)) {
21 $plugin_list['csv'] = array(
22 'text' => __('CSV'),
23 'extension' => 'csv',
24 'options' => array(
25 array('type' => 'begin_group', 'name' => 'general_opts'),
26 array('type' => 'bool', 'name' => 'replace', 'text' => __('Replace table data with file')),
27 array('type' => 'bool', 'name' => 'ignore', 'text' => __('Do not abort on INSERT error')),
28 array('type' => 'text', 'name' => 'terminated', 'text' => __('Columns separated with:'), 'size' => 2, 'len' => 2),
29 array('type' => 'text', 'name' => 'enclosed', 'text' => __('Columns enclosed with:'), 'size' => 2, 'len' => 2),
30 array('type' => 'text', 'name' => 'escaped', 'text' => __('Columns escaped with:'), 'size' => 2, 'len' => 2),
31 array('type' => 'text', 'name' => 'new_line', 'text' => __('Lines terminated with:'), 'size' => 2),
33 'options_text' => __('Options'),
36 if ($plugin_param !== 'table') {
37 $plugin_list['csv']['options'][]
38 = array('type' => 'bool', 'name' => 'col_names', 'text' => __('The first line of the file contains the table column names <i>(if this is unchecked, the first line will become part of the data)</i>'));
39 } else {
40 $hint = new PMA_Message(__('If the data in each row of the file is not in the same order as in the database, list the corresponding column names here. Column names must be separated by commas and not enclosed in quotations.'));
41 $plugin_list['csv']['options'][]
42 = array('type' => 'text', 'name' => 'columns', 'text' => __('Column names: ') . PMA_showHint($hint));
44 $plugin_list['csv']['options'][] = array('type' => 'end_group');
46 /* We do not define function when plugin is just queried for information above */
47 return;
50 $replacements = array(
51 '\\n' => "\n",
52 '\\t' => "\t",
53 '\\r' => "\r",
55 $csv_terminated = strtr($csv_terminated, $replacements);
56 $csv_enclosed = strtr($csv_enclosed, $replacements);
57 $csv_escaped = strtr($csv_escaped, $replacements);
58 $csv_new_line = strtr($csv_new_line, $replacements);
60 $param_error = false;
61 if (strlen($csv_terminated) != 1) {
62 $message = PMA_Message::error(__('Invalid parameter for CSV import: %s'));
63 $message->addParam(__('Columns terminated by'), false);
64 $error = true;
65 $param_error = true;
66 // The default dialog of MS Excel when generating a CSV produces a
67 // semi-colon-separated file with no chance of specifying the
68 // enclosing character. Thus, users who want to import this file
69 // tend to remove the enclosing character on the Import dialog.
70 // I could not find a test case where having no enclosing characters
71 // confuses this script.
72 // But the parser won't work correctly with strings so we allow just
73 // one character.
74 } elseif (strlen($csv_enclosed) > 1) {
75 $message = PMA_Message::error(__('Invalid parameter for CSV import: %s'));
76 $message->addParam(__('Columns enclosed by'), false);
77 $error = true;
78 $param_error = true;
79 } elseif (strlen($csv_escaped) != 1) {
80 $message = PMA_Message::error(__('Invalid parameter for CSV import: %s'));
81 $message->addParam(__('Columns escaped by'), false);
82 $error = true;
83 $param_error = true;
84 } elseif (strlen($csv_new_line) != 1 && $csv_new_line != 'auto') {
85 $message = PMA_Message::error(__('Invalid parameter for CSV import: %s'));
86 $message->addParam(__('Lines terminated by'), false);
87 $error = true;
88 $param_error = true;
91 // If there is an error in the parameters entered, indicate that immediately.
92 if ($param_error) {
93 PMA_mysqlDie($message->getMessage(), '', '', $err_url);
96 $buffer = '';
97 $required_fields = 0;
99 if (!$analyze) {
100 if (isset($csv_replace)) {
101 $sql_template = 'REPLACE';
102 } else {
103 $sql_template = 'INSERT';
104 if (isset($csv_ignore)) {
105 $sql_template .= ' IGNORE';
108 $sql_template .= ' INTO ' . PMA_backquote($table);
110 $tmp_fields = PMA_DBI_get_columns($db, $table);
112 if (empty($csv_columns)) {
113 $fields = $tmp_fields;
114 } else {
115 $sql_template .= ' (';
116 $fields = array();
117 $tmp = preg_split('/,( ?)/', $csv_columns);
118 foreach ($tmp as $key => $val) {
119 if (count($fields) > 0) {
120 $sql_template .= ', ';
122 /* Trim also `, if user already included backquoted fields */
123 $val = trim($val, " \t\r\n\0\x0B`");
124 $found = false;
125 foreach ($tmp_fields as $id => $field) {
126 if ($field['Field'] == $val) {
127 $found = true;
128 break;
131 if (!$found) {
132 $message = PMA_Message::error(__('Invalid column (%s) specified! Ensure that columns names are spelled correctly, separated by commas, and not enclosed in quotes.'));
133 $message->addParam($val);
134 $error = true;
135 break;
137 $fields[] = $field;
138 $sql_template .= PMA_backquote($val);
140 $sql_template .= ') ';
143 $required_fields = count($fields);
145 $sql_template .= ' VALUES (';
148 // Defaults for parser
149 $i = 0;
150 $len = 0;
151 $line = 1;
152 $lasti = -1;
153 $values = array();
154 $csv_finish = false;
156 $tempRow = array();
157 $rows = array();
158 $col_names = array();
159 $tables = array();
161 $col_count = 0;
162 $max_cols = 0;
164 while (!($finished && $i >= $len) && !$error && !$timeout_passed) {
165 $data = PMA_importGetNextChunk();
166 if ($data === false) {
167 // subtract data we didn't handle yet and stop processing
168 $offset -= strlen($buffer);
169 break;
170 } elseif ($data === true) {
171 // Handle rest of buffer
172 } else {
173 // Append new data to buffer
174 $buffer .= $data;
175 unset($data);
176 // Do not parse string when we're not at the end and don't have new line inside
177 if (($csv_new_line == 'auto' && strpos($buffer, "\r") === false && strpos($buffer, "\n") === false)
178 || ($csv_new_line != 'auto' && strpos($buffer, $csv_new_line) === false)
180 continue;
184 // Current length of our buffer
185 $len = strlen($buffer);
186 // Currently parsed char
187 $ch = $buffer[$i];
188 while ($i < $len) {
189 // Deadlock protection
190 if ($lasti == $i && $lastlen == $len) {
191 $message = PMA_Message::error(__('Invalid format of CSV input on line %d.'));
192 $message->addParam($line);
193 $error = true;
194 break;
196 $lasti = $i;
197 $lastlen = $len;
199 // This can happen with auto EOL and \r at the end of buffer
200 if (!$csv_finish) {
201 // Grab empty field
202 if ($ch == $csv_terminated) {
203 if ($i == $len - 1) {
204 break;
206 $values[] = '';
207 $i++;
208 $ch = $buffer[$i];
209 continue;
212 // Grab one field
213 $fallbacki = $i;
214 if ($ch == $csv_enclosed) {
215 if ($i == $len - 1) {
216 break;
218 $need_end = true;
219 $i++;
220 $ch = $buffer[$i];
221 } else {
222 $need_end = false;
224 $fail = false;
225 $value = '';
226 while (($need_end && ($ch != $csv_enclosed || $csv_enclosed == $csv_escaped))
227 || (!$need_end && !($ch == $csv_terminated
228 || $ch == $csv_new_line || ($csv_new_line == 'auto'
229 && ($ch == "\r" || $ch == "\n"))))) {
230 if ($ch == $csv_escaped) {
231 if ($i == $len - 1) {
232 $fail = true;
233 break;
235 $i++;
236 $ch = $buffer[$i];
237 if ($csv_enclosed == $csv_escaped && ($ch == $csv_terminated
238 || $ch == $csv_new_line || ($csv_new_line == 'auto'
239 && ($ch == "\r" || $ch == "\n")))) {
240 break;
243 $value .= $ch;
244 if ($i == $len - 1) {
245 if (!$finished) {
246 $fail = true;
248 break;
250 $i++;
251 $ch = $buffer[$i];
254 // unquoted NULL string
255 if (false === $need_end && $value === 'NULL') {
256 $value = null;
259 if ($fail) {
260 $i = $fallbacki;
261 $ch = $buffer[$i];
262 break;
264 // Need to strip trailing enclosing char?
265 if ($need_end && $ch == $csv_enclosed) {
266 if ($finished && $i == $len - 1) {
267 $ch = null;
268 } elseif ($i == $len - 1) {
269 $i = $fallbacki;
270 $ch = $buffer[$i];
271 break;
272 } else {
273 $i++;
274 $ch = $buffer[$i];
277 // Are we at the end?
278 if ($ch == $csv_new_line
279 || ($csv_new_line == 'auto' && ($ch == "\r" || $ch == "\n"))
280 || ($finished && $i == $len - 1)
282 $csv_finish = true;
284 // Go to next char
285 if ($ch == $csv_terminated) {
286 if ($i == $len - 1) {
287 $i = $fallbacki;
288 $ch = $buffer[$i];
289 break;
291 $i++;
292 $ch = $buffer[$i];
294 // If everything went okay, store value
295 $values[] = $value;
298 // End of line
299 if ($csv_finish
300 || $ch == $csv_new_line
301 || ($csv_new_line == 'auto' && ($ch == "\r" || $ch == "\n"))
303 if ($csv_new_line == 'auto' && $ch == "\r") { // Handle "\r\n"
304 if ($i >= ($len - 2) && !$finished) {
305 break; // We need more data to decide new line
307 if ($buffer[$i + 1] == "\n") {
308 $i++;
311 // We didn't parse value till the end of line, so there was empty one
312 if (!$csv_finish) {
313 $values[] = '';
316 if ($analyze) {
317 foreach ($values as $ley => $val) {
318 $tempRow[] = $val;
319 ++$col_count;
322 if ($col_count > $max_cols) {
323 $max_cols = $col_count;
325 $col_count = 0;
327 $rows[] = $tempRow;
328 $tempRow = array();
329 } else {
330 // Do we have correct count of values?
331 if (count($values) != $required_fields) {
333 // Hack for excel
334 if ($values[count($values) - 1] == ';') {
335 unset($values[count($values) - 1]);
336 } else {
337 $message = PMA_Message::error(__('Invalid column count in CSV input on line %d.'));
338 $message->addParam($line);
339 $error = true;
340 break;
344 $first = true;
345 $sql = $sql_template;
346 foreach ($values as $key => $val) {
347 if (!$first) {
348 $sql .= ', ';
350 if ($val === null) {
351 $sql .= 'NULL';
352 } else {
353 $sql .= '\'' . PMA_sqlAddSlashes($val) . '\'';
356 $first = false;
358 $sql .= ')';
361 * @todo maybe we could add original line to verbose SQL in comment
363 PMA_importRunQuery($sql, $sql);
366 $line++;
367 $csv_finish = false;
368 $values = array();
369 $buffer = substr($buffer, $i + 1);
370 $len = strlen($buffer);
371 $i = 0;
372 $lasti = -1;
373 $ch = $buffer[0];
375 } // End of parser loop
376 } // End of import loop
378 if ($analyze) {
379 /* Fill out all rows */
380 $num_rows = count($rows);
381 for ($i = 0; $i < $num_rows; ++$i) {
382 for ($j = count($rows[$i]); $j < $max_cols; ++$j) {
383 $rows[$i][] = 'NULL';
387 if (isset($_REQUEST['csv_col_names'])) {
388 $col_names = array_splice($rows, 0, 1);
389 $col_names = $col_names[0];
392 if ((isset($col_names) && count($col_names) != $max_cols)
393 || ! isset($col_names)
395 // Fill out column names
396 for ($i = 0; $i < $max_cols; ++$i) {
397 $col_names[] = 'COL '.($i+1);
401 if (strlen($db)) {
402 $result = PMA_DBI_fetch_result('SHOW TABLES');
403 $tbl_name = 'TABLE '.(count($result) + 1);
404 } else {
405 $tbl_name = 'TBL_NAME';
408 $tables[] = array($tbl_name, $col_names, $rows);
410 /* Obtain the best-fit MySQL types for each column */
411 $analyses = array();
412 $analyses[] = PMA_analyzeTable($tables[0]);
415 * string $db_name (no backquotes)
417 * array $table = array(table_name, array() column_names, array()() rows)
418 * array $tables = array of "$table"s
420 * array $analysis = array(array() column_types, array() column_sizes)
421 * array $analyses = array of "$analysis"s
423 * array $create = array of SQL strings
425 * array $options = an associative array of options
428 /* Set database name to the currently selected one, if applicable */
429 if (strlen($db)) {
430 $db_name = $db;
431 $options = array('create_db' => false);
432 } else {
433 $db_name = 'CSV_DB';
434 $options = null;
437 /* Non-applicable parameters */
438 $create = null;
440 /* Created and execute necessary SQL statements from data */
441 PMA_buildSQL($db_name, $tables, $analyses, $create, $options);
443 unset($tables);
444 unset($analyses);
447 // Commit any possible data in buffers
448 PMA_importRunQuery();
450 if (count($values) != 0 && !$error) {
451 $message = PMA_Message::error(__('Invalid format of CSV input on line %d.'));
452 $message->addParam($line);
453 $error = true;