3 namespace PhpOffice\PhpSpreadsheet\Reader
;
5 use PhpOffice\PhpSpreadsheet\Cell\Coordinate
;
6 use PhpOffice\PhpSpreadsheet\Shared\StringHelper
;
7 use PhpOffice\PhpSpreadsheet\Spreadsheet
;
9 class Csv
extends BaseReader
16 private $inputEncoding = 'UTF-8';
30 private $enclosure = '"';
33 * Sheet index to read.
37 private $sheetIndex = 0;
40 * Load rows contiguously.
44 private $contiguous = false;
47 * Row counter for loading rows contiguously.
51 private $contiguousRow = -1;
54 * The character that can escape the enclosure.
58 private $escapeCharacter = '\\';
61 * Create a new CSV Reader instance.
63 public function __construct()
65 $this->readFilter
= new DefaultReadFilter();
71 * @param string $pValue Input encoding, eg: 'UTF-8'
75 public function setInputEncoding($pValue)
77 $this->inputEncoding
= $pValue;
87 public function getInputEncoding()
89 return $this->inputEncoding
;
93 * Move filepointer past any BOM marker.
95 protected function skipBOM()
97 rewind($this->fileHandle
);
99 switch ($this->inputEncoding
) {
101 fgets($this->fileHandle
, 4) == "\xEF\xBB\xBF" ?
102 fseek($this->fileHandle
, 3) : fseek($this->fileHandle
, 0);
106 fgets($this->fileHandle
, 3) == "\xFF\xFE" ?
107 fseek($this->fileHandle
, 2) : fseek($this->fileHandle
, 0);
111 fgets($this->fileHandle
, 3) == "\xFE\xFF" ?
112 fseek($this->fileHandle
, 2) : fseek($this->fileHandle
, 0);
116 fgets($this->fileHandle
, 5) == "\xFF\xFE\x00\x00" ?
117 fseek($this->fileHandle
, 4) : fseek($this->fileHandle
, 0);
121 fgets($this->fileHandle
, 5) == "\x00\x00\xFE\xFF" ?
122 fseek($this->fileHandle
, 4) : fseek($this->fileHandle
, 0);
131 * Identify any separator that is explicitly set in the file.
133 protected function checkSeparator()
135 $line = fgets($this->fileHandle
);
136 if ($line === false) {
140 if ((strlen(trim($line, "\r\n")) == 5) && (stripos($line, 'sep=') === 0)) {
141 $this->delimiter
= substr($line, 4, 1);
146 return $this->skipBOM();
150 * Infer the separator if it isn't explicitly set in the file or specified by the user.
152 protected function inferSeparator()
154 if ($this->delimiter
!== null) {
158 $potentialDelimiters = [',', ';', "\t", '|', ':', ' '];
160 foreach ($potentialDelimiters as $delimiter) {
161 $counts[$delimiter] = [];
164 // Count how many times each of the potential delimiters appears in each line
166 while (($line = fgets($this->fileHandle
)) !== false && (++
$numberLines < 1000)) {
167 // Drop everything that is enclosed to avoid counting false positives in enclosures
168 $enclosure = preg_quote($this->enclosure
, '/');
169 $line = preg_replace('/(' . $enclosure . '.*' . $enclosure . ')/U', '', $line);
172 for ($i = strlen($line) - 1; $i >= 0; --$i) {
174 if (isset($counts[$char])) {
175 if (!isset($countLine[$char])) {
176 $countLine[$char] = 0;
181 foreach ($potentialDelimiters as $delimiter) {
182 $counts[$delimiter][] = isset($countLine[$delimiter])
183 ?
$countLine[$delimiter]
188 // Calculate the mean square deviations for each delimiter (ignoring delimiters that haven't been found consistently)
189 $meanSquareDeviations = [];
190 $middleIdx = floor(($numberLines - 1) / 2);
192 foreach ($potentialDelimiters as $delimiter) {
193 $series = $counts[$delimiter];
196 $median = ($numberLines %
2)
197 ?
$series[$middleIdx]
198 : ($series[$middleIdx] +
$series[$middleIdx +
1]) / 2;
204 $meanSquareDeviations[$delimiter] = array_reduce(
206 function ($sum, $value) use ($median) {
207 return $sum +
pow($value - $median, 2);
212 // ... and pick the delimiter with the smallest mean square deviation (in case of ties, the order in potentialDelimiters is respected)
214 foreach ($potentialDelimiters as $delimiter) {
215 if (!isset($meanSquareDeviations[$delimiter])) {
219 if ($meanSquareDeviations[$delimiter] < $min) {
220 $min = $meanSquareDeviations[$delimiter];
221 $this->delimiter
= $delimiter;
225 // If no delimiter could be detected, fall back to the default
226 if ($this->delimiter
=== null) {
227 $this->delimiter
= reset($potentialDelimiters);
230 return $this->skipBOM();
234 * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
236 * @param string $pFilename
242 public function listWorksheetInfo($pFilename)
245 if (!$this->canRead($pFilename)) {
246 throw new Exception($pFilename . ' is an Invalid Spreadsheet file.');
248 $this->openFile($pFilename);
249 $fileHandle = $this->fileHandle
;
253 $this->checkSeparator();
254 $this->inferSeparator();
257 $worksheetInfo[0]['worksheetName'] = 'Worksheet';
258 $worksheetInfo[0]['lastColumnLetter'] = 'A';
259 $worksheetInfo[0]['lastColumnIndex'] = 0;
260 $worksheetInfo[0]['totalRows'] = 0;
261 $worksheetInfo[0]['totalColumns'] = 0;
263 // Loop through each line of the file in turn
264 while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter
, $this->enclosure
, $this->escapeCharacter
)) !== false) {
265 ++
$worksheetInfo[0]['totalRows'];
266 $worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1);
269 $worksheetInfo[0]['lastColumnLetter'] = Coordinate
::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex'] +
1);
270 $worksheetInfo[0]['totalColumns'] = $worksheetInfo[0]['lastColumnIndex'] +
1;
275 return $worksheetInfo;
279 * Loads Spreadsheet from file.
281 * @param string $pFilename
285 * @return Spreadsheet
287 public function load($pFilename)
289 // Create new Spreadsheet
290 $spreadsheet = new Spreadsheet();
292 // Load into this instance
293 return $this->loadIntoExisting($pFilename, $spreadsheet);
297 * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
299 * @param string $pFilename
300 * @param Spreadsheet $spreadsheet
304 * @return Spreadsheet
306 public function loadIntoExisting($pFilename, Spreadsheet
$spreadsheet)
308 $lineEnding = ini_get('auto_detect_line_endings');
309 ini_set('auto_detect_line_endings', true);
312 if (!$this->canRead($pFilename)) {
313 throw new Exception($pFilename . ' is an Invalid Spreadsheet file.');
315 $this->openFile($pFilename);
316 $fileHandle = $this->fileHandle
;
320 $this->checkSeparator();
321 $this->inferSeparator();
323 // Create new PhpSpreadsheet object
324 while ($spreadsheet->getSheetCount() <= $this->sheetIndex
) {
325 $spreadsheet->createSheet();
327 $sheet = $spreadsheet->setActiveSheetIndex($this->sheetIndex
);
329 // Set our starting row based on whether we're in contiguous mode or not
331 if ($this->contiguous
) {
332 $currentRow = ($this->contiguousRow
== -1) ?
$sheet->getHighestRow() : $this->contiguousRow
;
335 // Loop through each line of the file in turn
336 while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter
, $this->enclosure
, $this->escapeCharacter
)) !== false) {
338 foreach ($rowData as $rowDatum) {
339 if ($rowDatum != '' && $this->readFilter
->readCell($columnLetter, $currentRow)) {
340 // Convert encoding if necessary
341 if ($this->inputEncoding
!== 'UTF-8') {
342 $rowDatum = StringHelper
::convertEncoding($rowDatum, 'UTF-8', $this->inputEncoding
);
346 $sheet->getCell($columnLetter . $currentRow)->setValue($rowDatum);
356 if ($this->contiguous
) {
357 $this->contiguousRow
= $currentRow;
360 ini_set('auto_detect_line_endings', $lineEnding);
371 public function getDelimiter()
373 return $this->delimiter
;
379 * @param string $delimiter Delimiter, eg: ','
383 public function setDelimiter($delimiter)
385 $this->delimiter
= $delimiter;
395 public function getEnclosure()
397 return $this->enclosure
;
403 * @param string $enclosure Enclosure, defaults to "
407 public function setEnclosure($enclosure)
409 if ($enclosure == '') {
412 $this->enclosure
= $enclosure;
422 public function getSheetIndex()
424 return $this->sheetIndex
;
430 * @param int $pValue Sheet index
434 public function setSheetIndex($pValue)
436 $this->sheetIndex
= $pValue;
444 * @param bool $contiguous
448 public function setContiguous($contiguous)
450 $this->contiguous
= (bool) $contiguous;
452 $this->contiguousRow
= -1;
463 public function getContiguous()
465 return $this->contiguous
;
469 * Set escape backslashes.
471 * @param string $escapeCharacter
475 public function setEscapeCharacter($escapeCharacter)
477 $this->escapeCharacter
= $escapeCharacter;
483 * Get escape backslashes.
487 public function getEscapeCharacter()
489 return $this->escapeCharacter
;
493 * Can the current IReader read the file?
495 * @param string $pFilename
499 public function canRead($pFilename)
501 // Check if file exists
503 $this->openFile($pFilename);
504 } catch (Exception
$e) {
508 fclose($this->fileHandle
);
510 $type = mime_content_type($pFilename);
517 return in_array($type, $supportedTypes, true);