composer package updates
[openemr.git] / vendor / phpoffice / phpspreadsheet / src / PhpSpreadsheet / Reader / Csv.php
blobeaece1d2510e6e9f7baab131cf6ae2a5284e272b
1 <?php
3 namespace PhpOffice\PhpSpreadsheet\Reader;
5 use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
6 use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
7 use PhpOffice\PhpSpreadsheet\Spreadsheet;
9 class Csv extends BaseReader
11 /**
12 * Input encoding.
14 * @var string
16 private $inputEncoding = 'UTF-8';
18 /**
19 * Delimiter.
21 * @var string
23 private $delimiter;
25 /**
26 * Enclosure.
28 * @var string
30 private $enclosure = '"';
32 /**
33 * Sheet index to read.
35 * @var int
37 private $sheetIndex = 0;
39 /**
40 * Load rows contiguously.
42 * @var bool
44 private $contiguous = false;
46 /**
47 * Row counter for loading rows contiguously.
49 * @var int
51 private $contiguousRow = -1;
53 /**
54 * The character that can escape the enclosure.
56 * @var string
58 private $escapeCharacter = '\\';
60 /**
61 * Create a new CSV Reader instance.
63 public function __construct()
65 $this->readFilter = new DefaultReadFilter();
68 /**
69 * Set input encoding.
71 * @param string $pValue Input encoding, eg: 'UTF-8'
73 * @return Csv
75 public function setInputEncoding($pValue)
77 $this->inputEncoding = $pValue;
79 return $this;
82 /**
83 * Get input encoding.
85 * @return string
87 public function getInputEncoding()
89 return $this->inputEncoding;
92 /**
93 * Move filepointer past any BOM marker.
95 protected function skipBOM()
97 rewind($this->fileHandle);
99 switch ($this->inputEncoding) {
100 case 'UTF-8':
101 fgets($this->fileHandle, 4) == "\xEF\xBB\xBF" ?
102 fseek($this->fileHandle, 3) : fseek($this->fileHandle, 0);
104 break;
105 case 'UTF-16LE':
106 fgets($this->fileHandle, 3) == "\xFF\xFE" ?
107 fseek($this->fileHandle, 2) : fseek($this->fileHandle, 0);
109 break;
110 case 'UTF-16BE':
111 fgets($this->fileHandle, 3) == "\xFE\xFF" ?
112 fseek($this->fileHandle, 2) : fseek($this->fileHandle, 0);
114 break;
115 case 'UTF-32LE':
116 fgets($this->fileHandle, 5) == "\xFF\xFE\x00\x00" ?
117 fseek($this->fileHandle, 4) : fseek($this->fileHandle, 0);
119 break;
120 case 'UTF-32BE':
121 fgets($this->fileHandle, 5) == "\x00\x00\xFE\xFF" ?
122 fseek($this->fileHandle, 4) : fseek($this->fileHandle, 0);
124 break;
125 default:
126 break;
131 * Identify any separator that is explicitly set in the file.
133 protected function checkSeparator()
135 $line = fgets($this->fileHandle);
136 if ($line === false) {
137 return;
140 if ((strlen(trim($line, "\r\n")) == 5) && (stripos($line, 'sep=') === 0)) {
141 $this->delimiter = substr($line, 4, 1);
143 return;
146 return $this->skipBOM();
150 * Infer the separator if it isn't explicitly set in the file or specified by the user.
152 protected function inferSeparator()
154 if ($this->delimiter !== null) {
155 return;
158 $potentialDelimiters = [',', ';', "\t", '|', ':', ' '];
159 $counts = [];
160 foreach ($potentialDelimiters as $delimiter) {
161 $counts[$delimiter] = [];
164 // Count how many times each of the potential delimiters appears in each line
165 $numberLines = 0;
166 while (($line = fgets($this->fileHandle)) !== false && (++$numberLines < 1000)) {
167 // Drop everything that is enclosed to avoid counting false positives in enclosures
168 $enclosure = preg_quote($this->enclosure, '/');
169 $line = preg_replace('/(' . $enclosure . '.*' . $enclosure . ')/U', '', $line);
171 $countLine = [];
172 for ($i = strlen($line) - 1; $i >= 0; --$i) {
173 $char = $line[$i];
174 if (isset($counts[$char])) {
175 if (!isset($countLine[$char])) {
176 $countLine[$char] = 0;
178 ++$countLine[$char];
181 foreach ($potentialDelimiters as $delimiter) {
182 $counts[$delimiter][] = isset($countLine[$delimiter])
183 ? $countLine[$delimiter]
184 : 0;
188 // Calculate the mean square deviations for each delimiter (ignoring delimiters that haven't been found consistently)
189 $meanSquareDeviations = [];
190 $middleIdx = floor(($numberLines - 1) / 2);
192 foreach ($potentialDelimiters as $delimiter) {
193 $series = $counts[$delimiter];
194 sort($series);
196 $median = ($numberLines % 2)
197 ? $series[$middleIdx]
198 : ($series[$middleIdx] + $series[$middleIdx + 1]) / 2;
200 if ($median === 0) {
201 continue;
204 $meanSquareDeviations[$delimiter] = array_reduce(
205 $series,
206 function ($sum, $value) use ($median) {
207 return $sum + pow($value - $median, 2);
209 ) / count($series);
212 // ... and pick the delimiter with the smallest mean square deviation (in case of ties, the order in potentialDelimiters is respected)
213 $min = INF;
214 foreach ($potentialDelimiters as $delimiter) {
215 if (!isset($meanSquareDeviations[$delimiter])) {
216 continue;
219 if ($meanSquareDeviations[$delimiter] < $min) {
220 $min = $meanSquareDeviations[$delimiter];
221 $this->delimiter = $delimiter;
225 // If no delimiter could be detected, fall back to the default
226 if ($this->delimiter === null) {
227 $this->delimiter = reset($potentialDelimiters);
230 return $this->skipBOM();
234 * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
236 * @param string $pFilename
238 * @throws Exception
240 * @return array
242 public function listWorksheetInfo($pFilename)
244 // Open file
245 if (!$this->canRead($pFilename)) {
246 throw new Exception($pFilename . ' is an Invalid Spreadsheet file.');
248 $this->openFile($pFilename);
249 $fileHandle = $this->fileHandle;
251 // Skip BOM, if any
252 $this->skipBOM();
253 $this->checkSeparator();
254 $this->inferSeparator();
256 $worksheetInfo = [];
257 $worksheetInfo[0]['worksheetName'] = 'Worksheet';
258 $worksheetInfo[0]['lastColumnLetter'] = 'A';
259 $worksheetInfo[0]['lastColumnIndex'] = 0;
260 $worksheetInfo[0]['totalRows'] = 0;
261 $worksheetInfo[0]['totalColumns'] = 0;
263 // Loop through each line of the file in turn
264 while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) {
265 ++$worksheetInfo[0]['totalRows'];
266 $worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1);
269 $worksheetInfo[0]['lastColumnLetter'] = Coordinate::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex'] + 1);
270 $worksheetInfo[0]['totalColumns'] = $worksheetInfo[0]['lastColumnIndex'] + 1;
272 // Close file
273 fclose($fileHandle);
275 return $worksheetInfo;
279 * Loads Spreadsheet from file.
281 * @param string $pFilename
283 * @throws Exception
285 * @return Spreadsheet
287 public function load($pFilename)
289 // Create new Spreadsheet
290 $spreadsheet = new Spreadsheet();
292 // Load into this instance
293 return $this->loadIntoExisting($pFilename, $spreadsheet);
297 * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
299 * @param string $pFilename
300 * @param Spreadsheet $spreadsheet
302 * @throws Exception
304 * @return Spreadsheet
306 public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet)
308 $lineEnding = ini_get('auto_detect_line_endings');
309 ini_set('auto_detect_line_endings', true);
311 // Open file
312 if (!$this->canRead($pFilename)) {
313 throw new Exception($pFilename . ' is an Invalid Spreadsheet file.');
315 $this->openFile($pFilename);
316 $fileHandle = $this->fileHandle;
318 // Skip BOM, if any
319 $this->skipBOM();
320 $this->checkSeparator();
321 $this->inferSeparator();
323 // Create new PhpSpreadsheet object
324 while ($spreadsheet->getSheetCount() <= $this->sheetIndex) {
325 $spreadsheet->createSheet();
327 $sheet = $spreadsheet->setActiveSheetIndex($this->sheetIndex);
329 // Set our starting row based on whether we're in contiguous mode or not
330 $currentRow = 1;
331 if ($this->contiguous) {
332 $currentRow = ($this->contiguousRow == -1) ? $sheet->getHighestRow() : $this->contiguousRow;
335 // Loop through each line of the file in turn
336 while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) {
337 $columnLetter = 'A';
338 foreach ($rowData as $rowDatum) {
339 if ($rowDatum != '' && $this->readFilter->readCell($columnLetter, $currentRow)) {
340 // Convert encoding if necessary
341 if ($this->inputEncoding !== 'UTF-8') {
342 $rowDatum = StringHelper::convertEncoding($rowDatum, 'UTF-8', $this->inputEncoding);
345 // Set cell value
346 $sheet->getCell($columnLetter . $currentRow)->setValue($rowDatum);
348 ++$columnLetter;
350 ++$currentRow;
353 // Close file
354 fclose($fileHandle);
356 if ($this->contiguous) {
357 $this->contiguousRow = $currentRow;
360 ini_set('auto_detect_line_endings', $lineEnding);
362 // Return
363 return $spreadsheet;
367 * Get delimiter.
369 * @return string
371 public function getDelimiter()
373 return $this->delimiter;
377 * Set delimiter.
379 * @param string $delimiter Delimiter, eg: ','
381 * @return CSV
383 public function setDelimiter($delimiter)
385 $this->delimiter = $delimiter;
387 return $this;
391 * Get enclosure.
393 * @return string
395 public function getEnclosure()
397 return $this->enclosure;
401 * Set enclosure.
403 * @param string $enclosure Enclosure, defaults to "
405 * @return CSV
407 public function setEnclosure($enclosure)
409 if ($enclosure == '') {
410 $enclosure = '"';
412 $this->enclosure = $enclosure;
414 return $this;
418 * Get sheet index.
420 * @return int
422 public function getSheetIndex()
424 return $this->sheetIndex;
428 * Set sheet index.
430 * @param int $pValue Sheet index
432 * @return CSV
434 public function setSheetIndex($pValue)
436 $this->sheetIndex = $pValue;
438 return $this;
442 * Set Contiguous.
444 * @param bool $contiguous
446 * @return Csv
448 public function setContiguous($contiguous)
450 $this->contiguous = (bool) $contiguous;
451 if (!$contiguous) {
452 $this->contiguousRow = -1;
455 return $this;
459 * Get Contiguous.
461 * @return bool
463 public function getContiguous()
465 return $this->contiguous;
469 * Set escape backslashes.
471 * @param string $escapeCharacter
473 * @return $this
475 public function setEscapeCharacter($escapeCharacter)
477 $this->escapeCharacter = $escapeCharacter;
479 return $this;
483 * Get escape backslashes.
485 * @return string
487 public function getEscapeCharacter()
489 return $this->escapeCharacter;
493 * Can the current IReader read the file?
495 * @param string $pFilename
497 * @return bool
499 public function canRead($pFilename)
501 // Check if file exists
502 try {
503 $this->openFile($pFilename);
504 } catch (Exception $e) {
505 return false;
508 fclose($this->fileHandle);
510 $type = mime_content_type($pFilename);
511 $supportedTypes = [
512 'text/csv',
513 'text/plain',
514 'inode/x-empty',
517 return in_array($type, $supportedTypes, true);