updated a couple packages (#1567)
[openemr.git] / vendor / phpoffice / phpspreadsheet / src / PhpSpreadsheet / Reader / Csv.php
blob6adb6ee66e02b3448657ed970f82df68fce2ca05
1 <?php
3 namespace PhpOffice\PhpSpreadsheet\Reader;
5 use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
6 use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
7 use PhpOffice\PhpSpreadsheet\Spreadsheet;
9 class Csv extends BaseReader
11 /**
12 * Input encoding.
14 * @var string
16 private $inputEncoding = 'UTF-8';
18 /**
19 * Delimiter.
21 * @var string
23 private $delimiter;
25 /**
26 * Enclosure.
28 * @var string
30 private $enclosure = '"';
32 /**
33 * Sheet index to read.
35 * @var int
37 private $sheetIndex = 0;
39 /**
40 * Load rows contiguously.
42 * @var bool
44 private $contiguous = false;
46 /**
47 * Row counter for loading rows contiguously.
49 * @var int
51 private $contiguousRow = -1;
53 /**
54 * Create a new CSV Reader instance.
56 public function __construct()
58 $this->readFilter = new DefaultReadFilter();
61 /**
62 * Set input encoding.
64 * @param string $pValue Input encoding, eg: 'UTF-8'
66 * @return Csv
68 public function setInputEncoding($pValue)
70 $this->inputEncoding = $pValue;
72 return $this;
75 /**
76 * Get input encoding.
78 * @return string
80 public function getInputEncoding()
82 return $this->inputEncoding;
85 /**
86 * Move filepointer past any BOM marker.
88 protected function skipBOM()
90 rewind($this->fileHandle);
92 switch ($this->inputEncoding) {
93 case 'UTF-8':
94 fgets($this->fileHandle, 4) == "\xEF\xBB\xBF" ?
95 fseek($this->fileHandle, 3) : fseek($this->fileHandle, 0);
97 break;
98 case 'UTF-16LE':
99 fgets($this->fileHandle, 3) == "\xFF\xFE" ?
100 fseek($this->fileHandle, 2) : fseek($this->fileHandle, 0);
102 break;
103 case 'UTF-16BE':
104 fgets($this->fileHandle, 3) == "\xFE\xFF" ?
105 fseek($this->fileHandle, 2) : fseek($this->fileHandle, 0);
107 break;
108 case 'UTF-32LE':
109 fgets($this->fileHandle, 5) == "\xFF\xFE\x00\x00" ?
110 fseek($this->fileHandle, 4) : fseek($this->fileHandle, 0);
112 break;
113 case 'UTF-32BE':
114 fgets($this->fileHandle, 5) == "\x00\x00\xFE\xFF" ?
115 fseek($this->fileHandle, 4) : fseek($this->fileHandle, 0);
117 break;
118 default:
119 break;
124 * Identify any separator that is explicitly set in the file.
126 protected function checkSeparator()
128 $line = fgets($this->fileHandle);
129 if ($line === false) {
130 return;
133 if ((strlen(trim($line, "\r\n")) == 5) && (stripos($line, 'sep=') === 0)) {
134 $this->delimiter = substr($line, 4, 1);
136 return;
139 return $this->skipBOM();
143 * Infer the separator if it isn't explicitly set in the file or specified by the user.
145 protected function inferSeparator()
147 if ($this->delimiter !== null) {
148 return;
151 $potentialDelimiters = [',', ';', "\t", '|', ':', ' '];
152 $counts = [];
153 foreach ($potentialDelimiters as $delimiter) {
154 $counts[$delimiter] = [];
157 // Count how many times each of the potential delimiters appears in each line
158 $numberLines = 0;
159 while (($line = fgets($this->fileHandle)) !== false && (++$numberLines < 1000)) {
160 // Drop everything that is enclosed to avoid counting false positives in enclosures
161 $enclosure = preg_quote($this->enclosure, '/');
162 $line = preg_replace('/(' . $enclosure . '.*' . $enclosure . ')/U', '', $line);
164 $countLine = [];
165 for ($i = strlen($line) - 1; $i >= 0; --$i) {
166 $char = $line[$i];
167 if (isset($counts[$char])) {
168 if (!isset($countLine[$char])) {
169 $countLine[$char] = 0;
171 ++$countLine[$char];
174 foreach ($potentialDelimiters as $delimiter) {
175 $counts[$delimiter][] = isset($countLine[$delimiter])
176 ? $countLine[$delimiter]
177 : 0;
181 // Calculate the mean square deviations for each delimiter (ignoring delimiters that haven't been found consistently)
182 $meanSquareDeviations = [];
183 $middleIdx = floor(($numberLines - 1) / 2);
185 foreach ($potentialDelimiters as $delimiter) {
186 $series = $counts[$delimiter];
187 sort($series);
189 $median = ($numberLines % 2)
190 ? $series[$middleIdx]
191 : ($series[$middleIdx] + $series[$middleIdx + 1]) / 2;
193 if ($median === 0) {
194 continue;
197 $meanSquareDeviations[$delimiter] = array_reduce(
198 $series,
199 function ($sum, $value) use ($median) {
200 return $sum + pow($value - $median, 2);
202 ) / count($series);
205 // ... and pick the delimiter with the smallest mean square deviation (in case of ties, the order in potentialDelimiters is respected)
206 $min = INF;
207 foreach ($potentialDelimiters as $delimiter) {
208 if (!isset($meanSquareDeviations[$delimiter])) {
209 continue;
212 if ($meanSquareDeviations[$delimiter] < $min) {
213 $min = $meanSquareDeviations[$delimiter];
214 $this->delimiter = $delimiter;
218 // If no delimiter could be detected, fall back to the default
219 if ($this->delimiter === null) {
220 $this->delimiter = reset($potentialDelimiters);
223 return $this->skipBOM();
227 * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
229 * @param string $pFilename
231 * @throws Exception
233 * @return array
235 public function listWorksheetInfo($pFilename)
237 // Open file
238 if (!$this->canRead($pFilename)) {
239 throw new Exception($pFilename . ' is an Invalid Spreadsheet file.');
241 $this->openFile($pFilename);
242 $fileHandle = $this->fileHandle;
244 // Skip BOM, if any
245 $this->skipBOM();
246 $this->checkSeparator();
247 $this->inferSeparator();
249 $worksheetInfo = [];
250 $worksheetInfo[0]['worksheetName'] = 'Worksheet';
251 $worksheetInfo[0]['lastColumnLetter'] = 'A';
252 $worksheetInfo[0]['lastColumnIndex'] = 0;
253 $worksheetInfo[0]['totalRows'] = 0;
254 $worksheetInfo[0]['totalColumns'] = 0;
256 // Loop through each line of the file in turn
257 while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure)) !== false) {
258 ++$worksheetInfo[0]['totalRows'];
259 $worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1);
262 $worksheetInfo[0]['lastColumnLetter'] = Coordinate::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex'] + 1);
263 $worksheetInfo[0]['totalColumns'] = $worksheetInfo[0]['lastColumnIndex'] + 1;
265 // Close file
266 fclose($fileHandle);
268 return $worksheetInfo;
272 * Loads Spreadsheet from file.
274 * @param string $pFilename
276 * @throws Exception
278 * @return Spreadsheet
280 public function load($pFilename)
282 // Create new Spreadsheet
283 $spreadsheet = new Spreadsheet();
285 // Load into this instance
286 return $this->loadIntoExisting($pFilename, $spreadsheet);
290 * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
292 * @param string $pFilename
293 * @param Spreadsheet $spreadsheet
295 * @throws Exception
297 * @return Spreadsheet
299 public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet)
301 $lineEnding = ini_get('auto_detect_line_endings');
302 ini_set('auto_detect_line_endings', true);
304 // Open file
305 if (!$this->canRead($pFilename)) {
306 throw new Exception($pFilename . ' is an Invalid Spreadsheet file.');
308 $this->openFile($pFilename);
309 $fileHandle = $this->fileHandle;
311 // Skip BOM, if any
312 $this->skipBOM();
313 $this->checkSeparator();
314 $this->inferSeparator();
316 // Create new PhpSpreadsheet object
317 while ($spreadsheet->getSheetCount() <= $this->sheetIndex) {
318 $spreadsheet->createSheet();
320 $sheet = $spreadsheet->setActiveSheetIndex($this->sheetIndex);
322 // Set our starting row based on whether we're in contiguous mode or not
323 $currentRow = 1;
324 if ($this->contiguous) {
325 $currentRow = ($this->contiguousRow == -1) ? $sheet->getHighestRow() : $this->contiguousRow;
328 // Loop through each line of the file in turn
329 while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure)) !== false) {
330 $columnLetter = 'A';
331 foreach ($rowData as $rowDatum) {
332 if ($rowDatum != '' && $this->readFilter->readCell($columnLetter, $currentRow)) {
333 // Convert encoding if necessary
334 if ($this->inputEncoding !== 'UTF-8') {
335 $rowDatum = StringHelper::convertEncoding($rowDatum, 'UTF-8', $this->inputEncoding);
338 // Set cell value
339 $sheet->getCell($columnLetter . $currentRow)->setValue($rowDatum);
341 ++$columnLetter;
343 ++$currentRow;
346 // Close file
347 fclose($fileHandle);
349 if ($this->contiguous) {
350 $this->contiguousRow = $currentRow;
353 ini_set('auto_detect_line_endings', $lineEnding);
355 // Return
356 return $spreadsheet;
360 * Get delimiter.
362 * @return string
364 public function getDelimiter()
366 return $this->delimiter;
370 * Set delimiter.
372 * @param string $delimiter Delimiter, eg: ','
374 * @return CSV
376 public function setDelimiter($delimiter)
378 $this->delimiter = $delimiter;
380 return $this;
384 * Get enclosure.
386 * @return string
388 public function getEnclosure()
390 return $this->enclosure;
394 * Set enclosure.
396 * @param string $enclosure Enclosure, defaults to "
398 * @return CSV
400 public function setEnclosure($enclosure)
402 if ($enclosure == '') {
403 $enclosure = '"';
405 $this->enclosure = $enclosure;
407 return $this;
411 * Get sheet index.
413 * @return int
415 public function getSheetIndex()
417 return $this->sheetIndex;
421 * Set sheet index.
423 * @param int $pValue Sheet index
425 * @return CSV
427 public function setSheetIndex($pValue)
429 $this->sheetIndex = $pValue;
431 return $this;
435 * Set Contiguous.
437 * @param bool $contiguous
439 * @return Csv
441 public function setContiguous($contiguous)
443 $this->contiguous = (bool) $contiguous;
444 if (!$contiguous) {
445 $this->contiguousRow = -1;
448 return $this;
452 * Get Contiguous.
454 * @return bool
456 public function getContiguous()
458 return $this->contiguous;
462 * Can the current IReader read the file?
464 * @param string $pFilename
466 * @return bool
468 public function canRead($pFilename)
470 // Check if file exists
471 try {
472 $this->openFile($pFilename);
473 } catch (Exception $e) {
474 return false;
477 fclose($this->fileHandle);
479 $type = mime_content_type($pFilename);
480 $supportedTypes = [
481 'text/csv',
482 'text/plain',
483 'inode/x-empty',
486 return in_array($type, $supportedTypes, true);