3 namespace PhpOffice\PhpSpreadsheet\Reader
;
5 use PhpOffice\PhpSpreadsheet\Cell\Coordinate
;
6 use PhpOffice\PhpSpreadsheet\Shared\StringHelper
;
7 use PhpOffice\PhpSpreadsheet\Spreadsheet
;
9 class Csv
extends BaseReader
16 private $inputEncoding = 'UTF-8';
30 private $enclosure = '"';
33 * Sheet index to read.
37 private $sheetIndex = 0;
40 * Load rows contiguously.
44 private $contiguous = false;
47 * Row counter for loading rows contiguously.
51 private $contiguousRow = -1;
54 * Create a new CSV Reader instance.
56 public function __construct()
58 $this->readFilter
= new DefaultReadFilter();
64 * @param string $pValue Input encoding, eg: 'UTF-8'
68 public function setInputEncoding($pValue)
70 $this->inputEncoding
= $pValue;
80 public function getInputEncoding()
82 return $this->inputEncoding
;
86 * Move filepointer past any BOM marker.
88 protected function skipBOM()
90 rewind($this->fileHandle
);
92 switch ($this->inputEncoding
) {
94 fgets($this->fileHandle
, 4) == "\xEF\xBB\xBF" ?
95 fseek($this->fileHandle
, 3) : fseek($this->fileHandle
, 0);
99 fgets($this->fileHandle
, 3) == "\xFF\xFE" ?
100 fseek($this->fileHandle
, 2) : fseek($this->fileHandle
, 0);
104 fgets($this->fileHandle
, 3) == "\xFE\xFF" ?
105 fseek($this->fileHandle
, 2) : fseek($this->fileHandle
, 0);
109 fgets($this->fileHandle
, 5) == "\xFF\xFE\x00\x00" ?
110 fseek($this->fileHandle
, 4) : fseek($this->fileHandle
, 0);
114 fgets($this->fileHandle
, 5) == "\x00\x00\xFE\xFF" ?
115 fseek($this->fileHandle
, 4) : fseek($this->fileHandle
, 0);
124 * Identify any separator that is explicitly set in the file.
126 protected function checkSeparator()
128 $line = fgets($this->fileHandle
);
129 if ($line === false) {
133 if ((strlen(trim($line, "\r\n")) == 5) && (stripos($line, 'sep=') === 0)) {
134 $this->delimiter
= substr($line, 4, 1);
139 return $this->skipBOM();
143 * Infer the separator if it isn't explicitly set in the file or specified by the user.
145 protected function inferSeparator()
147 if ($this->delimiter
!== null) {
151 $potentialDelimiters = [',', ';', "\t", '|', ':', ' '];
153 foreach ($potentialDelimiters as $delimiter) {
154 $counts[$delimiter] = [];
157 // Count how many times each of the potential delimiters appears in each line
159 while (($line = fgets($this->fileHandle
)) !== false && (++
$numberLines < 1000)) {
160 // Drop everything that is enclosed to avoid counting false positives in enclosures
161 $enclosure = preg_quote($this->enclosure
, '/');
162 $line = preg_replace('/(' . $enclosure . '.*' . $enclosure . ')/U', '', $line);
165 for ($i = strlen($line) - 1; $i >= 0; --$i) {
167 if (isset($counts[$char])) {
168 if (!isset($countLine[$char])) {
169 $countLine[$char] = 0;
174 foreach ($potentialDelimiters as $delimiter) {
175 $counts[$delimiter][] = isset($countLine[$delimiter])
176 ?
$countLine[$delimiter]
181 // Calculate the mean square deviations for each delimiter (ignoring delimiters that haven't been found consistently)
182 $meanSquareDeviations = [];
183 $middleIdx = floor(($numberLines - 1) / 2);
185 foreach ($potentialDelimiters as $delimiter) {
186 $series = $counts[$delimiter];
189 $median = ($numberLines %
2)
190 ?
$series[$middleIdx]
191 : ($series[$middleIdx] +
$series[$middleIdx +
1]) / 2;
197 $meanSquareDeviations[$delimiter] = array_reduce(
199 function ($sum, $value) use ($median) {
200 return $sum +
pow($value - $median, 2);
205 // ... and pick the delimiter with the smallest mean square deviation (in case of ties, the order in potentialDelimiters is respected)
207 foreach ($potentialDelimiters as $delimiter) {
208 if (!isset($meanSquareDeviations[$delimiter])) {
212 if ($meanSquareDeviations[$delimiter] < $min) {
213 $min = $meanSquareDeviations[$delimiter];
214 $this->delimiter
= $delimiter;
218 // If no delimiter could be detected, fall back to the default
219 if ($this->delimiter
=== null) {
220 $this->delimiter
= reset($potentialDelimiters);
223 return $this->skipBOM();
227 * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
229 * @param string $pFilename
235 public function listWorksheetInfo($pFilename)
238 if (!$this->canRead($pFilename)) {
239 throw new Exception($pFilename . ' is an Invalid Spreadsheet file.');
241 $this->openFile($pFilename);
242 $fileHandle = $this->fileHandle
;
246 $this->checkSeparator();
247 $this->inferSeparator();
250 $worksheetInfo[0]['worksheetName'] = 'Worksheet';
251 $worksheetInfo[0]['lastColumnLetter'] = 'A';
252 $worksheetInfo[0]['lastColumnIndex'] = 0;
253 $worksheetInfo[0]['totalRows'] = 0;
254 $worksheetInfo[0]['totalColumns'] = 0;
256 // Loop through each line of the file in turn
257 while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter
, $this->enclosure
)) !== false) {
258 ++
$worksheetInfo[0]['totalRows'];
259 $worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1);
262 $worksheetInfo[0]['lastColumnLetter'] = Coordinate
::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex'] +
1);
263 $worksheetInfo[0]['totalColumns'] = $worksheetInfo[0]['lastColumnIndex'] +
1;
268 return $worksheetInfo;
272 * Loads Spreadsheet from file.
274 * @param string $pFilename
278 * @return Spreadsheet
280 public function load($pFilename)
282 // Create new Spreadsheet
283 $spreadsheet = new Spreadsheet();
285 // Load into this instance
286 return $this->loadIntoExisting($pFilename, $spreadsheet);
290 * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
292 * @param string $pFilename
293 * @param Spreadsheet $spreadsheet
297 * @return Spreadsheet
299 public function loadIntoExisting($pFilename, Spreadsheet
$spreadsheet)
301 $lineEnding = ini_get('auto_detect_line_endings');
302 ini_set('auto_detect_line_endings', true);
305 if (!$this->canRead($pFilename)) {
306 throw new Exception($pFilename . ' is an Invalid Spreadsheet file.');
308 $this->openFile($pFilename);
309 $fileHandle = $this->fileHandle
;
313 $this->checkSeparator();
314 $this->inferSeparator();
316 // Create new PhpSpreadsheet object
317 while ($spreadsheet->getSheetCount() <= $this->sheetIndex
) {
318 $spreadsheet->createSheet();
320 $sheet = $spreadsheet->setActiveSheetIndex($this->sheetIndex
);
322 // Set our starting row based on whether we're in contiguous mode or not
324 if ($this->contiguous
) {
325 $currentRow = ($this->contiguousRow
== -1) ?
$sheet->getHighestRow() : $this->contiguousRow
;
328 // Loop through each line of the file in turn
329 while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter
, $this->enclosure
)) !== false) {
331 foreach ($rowData as $rowDatum) {
332 if ($rowDatum != '' && $this->readFilter
->readCell($columnLetter, $currentRow)) {
333 // Convert encoding if necessary
334 if ($this->inputEncoding
!== 'UTF-8') {
335 $rowDatum = StringHelper
::convertEncoding($rowDatum, 'UTF-8', $this->inputEncoding
);
339 $sheet->getCell($columnLetter . $currentRow)->setValue($rowDatum);
349 if ($this->contiguous
) {
350 $this->contiguousRow
= $currentRow;
353 ini_set('auto_detect_line_endings', $lineEnding);
364 public function getDelimiter()
366 return $this->delimiter
;
372 * @param string $delimiter Delimiter, eg: ','
376 public function setDelimiter($delimiter)
378 $this->delimiter
= $delimiter;
388 public function getEnclosure()
390 return $this->enclosure
;
396 * @param string $enclosure Enclosure, defaults to "
400 public function setEnclosure($enclosure)
402 if ($enclosure == '') {
405 $this->enclosure
= $enclosure;
415 public function getSheetIndex()
417 return $this->sheetIndex
;
423 * @param int $pValue Sheet index
427 public function setSheetIndex($pValue)
429 $this->sheetIndex
= $pValue;
437 * @param bool $contiguous
441 public function setContiguous($contiguous)
443 $this->contiguous
= (bool) $contiguous;
445 $this->contiguousRow
= -1;
456 public function getContiguous()
458 return $this->contiguous
;
462 * Can the current IReader read the file?
464 * @param string $pFilename
468 public function canRead($pFilename)
470 // Check if file exists
472 $this->openFile($pFilename);
473 } catch (Exception
$e) {
477 fclose($this->fileHandle
);
479 $type = mime_content_type($pFilename);
486 return in_array($type, $supportedTypes, true);