7 * Class: parseCSV v0.4.3 beta
8 * http://code.google.com/p/parsecsv-for-php/
11 * Fully conforms to the specifications lined out on wikipedia:
12 * - http://en.wikipedia.org/wiki/Comma-separated_values
14 * Based on the concept of Ming Hong Ng's CsvFileParser class:
15 * - http://minghong.blogspot.com/2006/07/csv-parser-for-php.html
19 * Copyright (c) 2007 Jim Myhrberg (jim@zydev.info).
21 * Permission is hereby granted, free of charge, to any person obtaining a copy
22 * of this software and associated documentation files (the "Software"), to deal
23 * in the Software without restriction, including without limitation the rights
24 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
25 * copies of the Software, and to permit persons to whom the Software is
26 * furnished to do so, subject to the following conditions:
28 * The above copyright notice and this permission notice shall be included in
29 * all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
32 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
33 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
34 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
35 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
36 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
44 * $csv = new parseCSV('data.csv');
45 * print_r($csv->data);
47 * # tab delimited, and encoding conversion
48 * $csv = new parseCSV();
49 * $csv->encoding('UTF-16', 'UTF-8');
50 * $csv->delimiter = "\t";
51 * $csv->parse('data.tsv');
52 * print_r($csv->data);
54 * # auto-detect delimiter character
55 * $csv = new parseCSV();
56 * $csv->auto('data.csv');
57 * print_r($csv->data);
59 * # modify data in a csv file
60 * $csv = new parseCSV();
61 * $csv->sort_by = 'id';
62 * $csv->parse('data.csv');
63 * # "4" is the value of the "id" column of the CSV row
64 * $csv->data[4] = array('firstname' => 'John', 'lastname' => 'Doe', 'email' => 'john@doe.com');
67 * # add row/entry to end of CSV file
68 * # - only recommended when you know the extact sctructure of the file
69 * $csv = new parseCSV();
70 * $csv->save('data.csv', array('1986', 'Home', 'Nowhere', ''), true);
72 * # convert 2D array to csv data and send headers
73 * # to browser to treat output as a file and download it
74 * $csv = new parseCSV();
75 * $csv->output (true, 'movies.csv', $array);
83 * - set these options with $object->var_name = 'value';
86 // use first line/entry as field names
89 // override field names
90 var $fields = array ();
92 // sort entries by this field
94 var $sort_reverse = false;
96 // sort behavior passed to ksort/krsort functions
97 // regular = SORT_REGULAR
98 // numeric = SORT_NUMERIC
99 // string = SORT_STRING
100 var $sort_type = null;
102 // delimiter (comma) and enclosure (double quote)
103 var $delimiter = ',';
104 var $enclosure = '"';
106 // basic SQL-like conditions for row matching
107 var $conditions = null;
109 // number of rows to ignore from beginning of data
112 // limits the number of returned rows to specified amount
115 // number of rows to analyze when attempting to auto-detect delimiter
116 var $auto_depth = 15;
118 // characters to ignore when attempting to auto-detect delimiter
119 var $auto_non_chars = "a-zA-Z0-9\n\r";
121 // preferred delimiter characters, only used when all filtering method
122 // returns multiple possible delimiters (happens very rarely)
123 var $auto_preferred = ",;\t.:|";
125 // character encoding options
126 var $convert_encoding = false;
127 var $input_encoding = 'ISO-8859-1';
128 var $output_encoding = 'ISO-8859-1';
130 // used by unparse(), save(), and output() functions
131 var $linefeed = "\r\n";
133 // only used by output() function
134 var $output_delimiter = ',';
135 var $output_filename = 'data.csv';
137 // keep raw file data in memory after successful parsing (useful for debugging)
138 var $keep_file_data = false;
147 // loaded file contents
150 // error while parsing input data
151 // 0 = No errors found. Everything should be fine :)
152 // 1 = Hopefully correctable syntax error was found.
153 // 2 = Enclosure character (double quote by default)
154 // was found in non-enclosed field. This means
155 // the file is either corrupt, or does not
156 // standard CSV formatting. Please validate
157 // the parsed data yourself.
160 // detailed error info
161 var $error_info = array ();
163 // array of field values in data parsed
164 var $titles = array ();
166 // two dimentional array of CSV data
167 var $data = array ();
173 * input CSV file or string
176 function __construct($input = null, $offset = null, $limit = null, $conditions = null)
178 if ($offset !== null) {
179 $this->offset
= $offset;
182 if ($limit !== null) {
183 $this->limit
= $limit;
186 if (count($conditions) > 0) {
187 $this->conditions
= $conditions;
190 if (! empty($input)) {
191 $this->parse($input);
195 // ==============================================
196 // ----- [ Main Functions ] ---------------------
197 // ==============================================
200 * Parse CSV file or string
203 * input CSV file or string
206 function parse($input = null, $offset = null, $limit = null, $conditions = null)
208 if ($input === null) {
209 $input = $this->file
;
212 if (! empty($input)) {
213 if ($offset !== null) {
214 $this->offset
= $offset;
217 if ($limit !== null) {
218 $this->limit
= $limit;
221 if (count($conditions) > 0) {
222 $this->conditions
= $conditions;
225 if (is_readable($input)) {
226 $this->data
= $this->parse_file($input);
228 $this->file_data
= &$input;
229 $this->data
= $this->parse_string();
232 if ($this->data
=== false) {
241 * Save changes, or new file and/or data
244 * file file to save to
246 * data 2D array with data
248 * append append current data to end of target CSV if exists
251 * @return true or false
253 function save($file = null, $data = array(), $append = false, $fields = array())
256 $file = &$this->file
;
259 $mode = ($append) ?
'at' : 'wt';
260 $is_php = (preg_match('/\.php$/i', $file)) ?
true : false;
261 return $this->_wfile($file, $this->unparse($data, $fields, $append, $is_php), $mode);
265 * Generate CSV based string for output
268 * filename if specified, headers and data will be output directly to browser as a downloable file
270 * data 2D array with data
274 * delimiter delimiter used to separate data
275 * @return CSV data using delimiter of choice, or default
277 function output($filename = null, $data = array(), $fields = array(), $delimiter = null)
279 if (empty($filename)) {
280 $filename = $this->output_filename
;
283 if ($delimiter === null) {
284 $delimiter = $this->output_delimiter
;
287 $data = $this->unparse($data, $fields, null, null, $delimiter);
288 if ($filename !== null) {
289 header('Content-type: application/csv');
290 header('Content-Disposition: attachment; filename="' . $filename . '"');
298 * Convert character encoding
301 * input input character encoding, uses default if left blank
303 * output output character encoding, uses default if left blank
306 function encoding($input = null, $output = null)
308 $this->convert_encoding
= true;
309 if ($input !== null) {
310 $this->input_encoding
= $input;
313 if ($output !== null) {
314 $this->output_encoding
= $output;
319 * Auto-Detect Delimiter: Find delimiter by analyzing a specific number of
320 * rows to determine most probable delimiter character
323 * file local CSV file
325 * parse true/false parse file directly
327 * search_depth number of rows to analyze
329 * preferred preferred delimiter characters
331 * enclosure enclosure character, default is double quote (").
332 * @return delimiter character
334 function auto($file = null, $parse = true, $search_depth = null, $preferred = null, $enclosure = null)
336 if ($file === null) {
340 if (empty($search_depth)) {
341 $search_depth = $this->auto_depth
;
344 if ($enclosure === null) {
345 $enclosure = $this->enclosure
;
348 if ($preferred === null) {
349 $preferred = $this->auto_preferred
;
352 if (empty($this->file_data
)) {
353 if ($this->_check_data($file)) {
354 $data = &$this->file_data
;
359 $data = &$this->file_data
;
363 $strlen = strlen($data);
368 // walk specific depth finding posssible delimiter characters
369 for ($i = 0; $i < $strlen; $i++
) {
371 $nch = (isset($data [$i +
1])) ?
$data [$i +
1] : false;
372 $pch = (isset($data [$i - 1])) ?
$data [$i - 1] : false;
374 // open and closing quotes
375 if ($ch == $enclosure) {
376 if (! $enclosed ||
$nch != $enclosure) {
377 $enclosed = ($enclosed) ?
false : true;
378 } elseif ($enclosed) {
383 } elseif (($ch == "\n" && $pch != "\r" ||
$ch == "\r") && ! $enclosed) {
384 if ($n >= $search_depth) {
392 } elseif (! $enclosed) {
393 if (! preg_match('/[' . preg_quote($this->auto_non_chars
, '/') . ']/i', $ch)) {
394 if (! isset($chars [$ch] [$n])) {
395 $chars [$ch] [$n] = 1;
397 $chars [$ch] [$n] ++
;
404 $depth = ($to_end) ?
$n - 1 : $n;
405 $filtered = array ();
406 foreach ($chars as $char => $value) {
407 if ($match = $this->_check_count($char, $value, $depth, $preferred)) {
408 $filtered [$match] = $char;
412 // capture most probable delimiter
414 $this->delimiter
= reset($filtered);
418 $this->data
= $this->parse_string();
421 return $this->delimiter
;
424 // ==============================================
425 // ----- [ Core Functions ] ---------------------
426 // ==============================================
429 * Read file to string and call parse_string()
432 * file local CSV file
433 * @return 2D array with CSV data, or false on failure
435 function parse_file($file = null)
437 if ($file === null) {
441 if (empty($this->file_data
)) {
442 $this->load_data($file);
445 return (! empty($this->file_data
)) ?
$this->parse_string() : false;
449 * Parse CSV strings to arrays
453 * @return 2D array with CSV data, or false on failure
455 function parse_string($data = null)
458 if ($this->_check_data()) {
459 $data = &$this->file_data
;
465 $white_spaces = str_replace($this->delimiter
, '', " \t\x0B\0");
471 $head = (! empty($this->fields
)) ?
$this->fields
: array ();
474 $was_enclosed = false;
475 $strlen = strlen($data);
477 // walk through each character
478 for ($i = 0; $i < $strlen; $i++
) {
480 $nch = (isset($data [$i +
1])) ?
$data [$i +
1] : false;
481 $pch = (isset($data [$i - 1])) ?
$data [$i - 1] : false;
483 // open/close quotes, and inline quotes
484 if ($ch == $this->enclosure
) {
486 if (ltrim($current, $white_spaces) == '') {
488 $was_enclosed = true;
491 $error_row = count($rows) +
1;
492 $error_col = $col +
1;
493 if (! isset($this->error_info
[$error_row . '-' . $error_col])) {
494 $this->error_info
[$error_row . '-' . $error_col] = array (
496 'info' => 'Syntax error found on row ' . $error_row . '. Non-enclosed fields can not contain double-quotes.',
498 'field' => $error_col,
499 'field_name' => (! empty($head [$col])) ?
$head [$col] : null
505 } elseif ($nch == $this->enclosure
) {
508 } elseif ($nch != $this->delimiter
&& $nch != "\r" && $nch != "\n") {
509 for ($x = ($i +
1); isset($data [$x]) && ltrim($data [$x], $white_spaces) == ''; $x++
) {
512 if ($data [$x] == $this->delimiter
) {
516 if ($this->error
< 1) {
520 $error_row = count($rows) +
1;
521 $error_col = $col +
1;
522 if (! isset($this->error_info
[$error_row . '-' . $error_col])) {
523 $this->error_info
[$error_row . '-' . $error_col] = array (
525 'info' => 'Syntax error found on row ' . (count($rows) +
1) . '. ' . 'A single double-quote was found within an enclosed string. ' . 'Enclosed double-quotes must be escaped with a second double-quote.',
526 'row' => count($rows) +
1,
528 'field_name' => (! empty($head [$col])) ?
$head [$col] : null
540 } elseif (($ch == $this->delimiter ||
$ch == "\n" ||
$ch == "\r") && ! $enclosed) {
541 $key = (! empty($head [$col])) ?
$head [$col] : $col;
542 $row [$key] = ($was_enclosed) ?
$current : trim($current);
544 $was_enclosed = false;
548 if ($ch == "\n" ||
$ch == "\r") {
549 if ($this->_validate_offset($row_count) && $this->_validate_row_conditions($row, $this->conditions
)) {
550 if ($this->heading
&& empty($head)) {
552 } elseif (empty($this->fields
) ||
(! empty($this->fields
) && (($this->heading
&& $row_count > 0) ||
! $this->heading
))) {
553 if (! empty($this->sort_by
) && ! empty($row [$this->sort_by
])) {
554 if (isset($rows [$row [$this->sort_by
]])) {
555 $rows [$row [$this->sort_by
] . '_0'] = &$rows [$row [$this->sort_by
]];
556 unset($rows [$row [$this->sort_by
]]);
557 for ($sn = 1; isset($rows [$row [$this->sort_by
] . '_' . $sn]); $sn++
) {
560 $rows [$row [$this->sort_by
] . '_' . $sn] = $row;
562 $rows [$row [$this->sort_by
]] = $row;
573 if ($this->sort_by
=== null && $this->limit
!== null && count($rows) == $this->limit
) {
577 if ($ch == "\r" && $nch == "\n") {
582 // append character to current field
588 $this->titles
= $head;
589 if (! empty($this->sort_by
)) {
590 $sort_type = SORT_REGULAR
;
591 if ($this->sort_type
== 'numeric') {
592 $sort_type = SORT_NUMERIC
;
593 } elseif ($this->sort_type
== 'string') {
594 $sort_type = SORT_STRING
;
597 ($this->sort_reverse
) ?
krsort($rows, $sort_type) : ksort($rows, $sort_type);
598 if ($this->offset
!== null ||
$this->limit
!== null) {
599 $rows = array_slice($rows, ($this->offset
=== null ?
0 : $this->offset
), $this->limit
, true);
603 if (! $this->keep_file_data
) {
604 $this->file_data
= null;
611 * Create CSV data from array
614 * data 2D array with data
618 * append if true, field names will not be output
620 * is_php if a php die() call should be put on the first
621 * line of the file, this is later ignored when read.
623 * delimiter field delimiter to use
624 * @return CSV data (text string)
626 function unparse($data = array(), $fields = array(), $append = false, $is_php = false, $delimiter = null)
628 if (! is_array($data) ||
empty($data)) {
629 $data = &$this->data
;
632 if (! is_array($fields) ||
empty($fields)) {
633 $fields = &$this->titles
;
636 if ($delimiter === null) {
637 $delimiter = $this->delimiter
;
640 $string = ($is_php) ?
"<?php header('Status: 403'); die(' '); ?>" . $this->linefeed
: '';
644 if ($this->heading
&& ! $append && ! empty($fields)) {
645 foreach ($fields as $key => $value) {
646 $entry [] = $this->_enclose_value($value);
649 $string .= implode($delimiter, $entry) . $this->linefeed
;
654 foreach ($data as $key => $row) {
655 foreach ($row as $field => $value) {
656 $entry [] = $this->_enclose_value($value);
659 $string .= implode($delimiter, $entry) . $this->linefeed
;
667 * Load local file or string
670 * input local CSV file
671 * @return true or false
673 function load_data($input = null)
677 if ($input === null) {
679 } elseif (file_exists($input)) {
685 if (! empty($data) ||
$data = $this->_rfile($file)) {
686 if ($this->file
!= $file) {
690 if (preg_match('/\.php$/i', $file) && preg_match('/<\?.*?\?>(.*)/ims', $data, $strip)) {
691 $data = ltrim($strip [1]);
694 if ($this->convert_encoding
) {
695 $data = iconv($this->input_encoding
, $this->output_encoding
, $data);
698 if (substr($data, - 1) != "\n") {
702 $this->file_data
= &$data;
709 // ==============================================
710 // ----- [ Internal Functions ] -----------------
711 // ==============================================
714 * Validate a row against specified conditions
717 * row array with values from a row
719 * conditions specified conditions that the row must match
720 * @return true of false
722 function _validate_row_conditions($row = array(), $conditions = null)
725 if (! empty($conditions)) {
726 $conditions = (strpos($conditions, ' OR ') !== false) ?
explode(' OR ', $conditions) : array (
730 foreach ($conditions as $key => $value) {
731 if (strpos($value, ' AND ') !== false) {
732 $value = explode(' AND ', $value);
734 foreach ($value as $k => $v) {
735 $and .= $this->_validate_row_condition($row, $v);
738 $or .= (strpos($and, '0') !== false) ?
'0' : '1';
740 $or .= $this->_validate_row_condition($row, $value);
744 return (strpos($or, '1') !== false) ?
true : false;
754 * Validate a row against a single condition
757 * row array with values from a row
759 * condition specified condition that the row must match
760 * @return true of false
762 function _validate_row_condition($row, $condition)
775 'is less than or equals',
777 'is greater than or equals',
781 $operators_regex = array ();
782 foreach ($operators as $value) {
783 $operators_regex [] = preg_quote($value, '/');
786 $operators_regex = implode('|', $operators_regex);
787 if (preg_match('/^(.+) (' . $operators_regex . ') (.+)$/i', trim($condition), $capture)) {
788 $field = $capture [1];
790 $value = $capture [3];
791 if (preg_match('/^([\'\"]{1})(.*)([\'\"]{1})$/i', $value, $capture)) {
792 if ($capture [1] == $capture [3]) {
793 $value = $capture [2];
794 $value = str_replace("\\n", "\n", $value);
795 $value = str_replace("\\r", "\r", $value);
796 $value = str_replace("\\t", "\t", $value);
797 $value = stripslashes($value);
801 if (array_key_exists($field, $row)) {
802 if (($op == '=' ||
$op == 'equals' ||
$op == 'is') && $row [$field] == $value) {
804 } elseif (($op == '!=' ||
$op == 'is not') && $row [$field] != $value) {
806 } elseif (($op == '<' ||
$op == 'is less than') && $row [$field] < $value) {
808 } elseif (($op == '>' ||
$op == 'is greater than') && $row [$field] > $value) {
810 } elseif (($op == '<=' ||
$op == 'is less than or equals') && $row [$field] <= $value) {
812 } elseif (($op == '>=' ||
$op == 'is greater than or equals') && $row [$field] >= $value) {
814 } elseif ($op == 'contains' && preg_match('/' . preg_quote($value, '/') . '/i', $row [$field])) {
816 } elseif ($op == 'does not contain' && ! preg_match('/' . preg_quote($value, '/') . '/i', $row [$field])) {
828 * Validates if the row is within the offset or not if sorting is disabled
831 * current_row the current row number being processed
832 * @return true of false
834 function _validate_offset($current_row)
836 if ($this->sort_by
=== null && $this->offset
!== null && $current_row < $this->offset
) {
844 * Enclose values if needed
845 * - only used by unparse()
848 * value string to process
849 * @return Processed value
851 function _enclose_value($value = null)
853 if ($value !== null && $value != '') {
854 $delimiter = preg_quote($this->delimiter
, '/');
855 $enclosure = preg_quote($this->enclosure
, '/');
856 if (preg_match("/" . $delimiter . "|" . $enclosure . "|\n|\r/i", $value) ||
($value [0] == ' ' ||
substr($value, - 1) == ' ')) {
857 $value = str_replace($this->enclosure
, $this->enclosure
. $this->enclosure
, $value);
858 $value = $this->enclosure
. $value . $this->enclosure
;
869 * file local filename
870 * @return true or false
872 function _check_data($file = null)
874 if (empty($this->file_data
)) {
875 if ($file === null) {
879 return $this->load_data($file);
886 * Check if passed info might be delimiter
887 * - only used by find_delimiter()
889 * @return special string used for delimiter selection, or false
891 function _check_count($char, $array, $depth, $preferred)
893 if ($depth == count($array)) {
897 foreach ($array as $key => $value) {
898 if ($first == null) {
900 } elseif ($value == $first && $equal !== false) {
902 } elseif ($value == $first +
1 && $equal !== false) {
911 $match = ($almost) ?
2 : 1;
912 $pref = strpos($preferred, $char);
913 $pref = ($pref !== false) ?
str_pad($pref, 3, '0', STR_PAD_LEFT
) : '999';
914 return $pref . $match . '.' . (99999 - str_pad($first, 5, '0', STR_PAD_LEFT
));
925 * file local filename
926 * @return Data from file, or false on failure
928 function _rfile($file = null)
930 if (is_readable($file)) {
931 if (! ($fh = fopen($file, 'r'))) {
935 $data = fread($fh, filesize($file));
944 * Write to local file
947 * file local filename
949 * string data to write to file
954 * @return true or false
956 function _wfile($file, $string = '', $mode = 'wb', $lock = 2)
958 if ($fp = fopen($file, $mode)) {
960 $re = fwrite($fp, $string);
962 if ($re != false && $re2 != false) {