composer package updates
[openemr.git] / vendor / phpoffice / phpspreadsheet / src / PhpSpreadsheet / Reader / Ods.php
blobec0716dfd05c0f3dad46d2c37d875b5bab309aa4
1 <?php
3 namespace PhpOffice\PhpSpreadsheet\Reader;
5 use DateTime;
6 use DateTimeZone;
7 use PhpOffice\PhpSpreadsheet\Calculation\Calculation;
8 use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
9 use PhpOffice\PhpSpreadsheet\Cell\DataType;
10 use PhpOffice\PhpSpreadsheet\Document\Properties;
11 use PhpOffice\PhpSpreadsheet\RichText\RichText;
12 use PhpOffice\PhpSpreadsheet\Settings;
13 use PhpOffice\PhpSpreadsheet\Shared\Date;
14 use PhpOffice\PhpSpreadsheet\Shared\File;
15 use PhpOffice\PhpSpreadsheet\Spreadsheet;
16 use PhpOffice\PhpSpreadsheet\Style\NumberFormat;
17 use XMLReader;
18 use ZipArchive;
20 class Ods extends BaseReader
22 /**
23 * Create a new Ods Reader instance.
25 public function __construct()
27 $this->readFilter = new DefaultReadFilter();
30 /**
31 * Can the current IReader read the file?
33 * @param string $pFilename
35 * @throws Exception
37 * @return bool
39 public function canRead($pFilename)
41 File::assertFile($pFilename);
43 $mimeType = 'UNKNOWN';
45 // Load file
47 $zip = new ZipArchive();
48 if ($zip->open($pFilename) === true) {
49 // check if it is an OOXML archive
50 $stat = $zip->statName('mimetype');
51 if ($stat && ($stat['size'] <= 255)) {
52 $mimeType = $zip->getFromName($stat['name']);
53 } elseif ($stat = $zip->statName('META-INF/manifest.xml')) {
54 $xml = simplexml_load_string(
55 $this->securityScan($zip->getFromName('META-INF/manifest.xml')),
56 'SimpleXMLElement',
57 Settings::getLibXmlLoaderOptions()
59 $namespacesContent = $xml->getNamespaces(true);
60 if (isset($namespacesContent['manifest'])) {
61 $manifest = $xml->children($namespacesContent['manifest']);
62 foreach ($manifest as $manifestDataSet) {
63 $manifestAttributes = $manifestDataSet->attributes($namespacesContent['manifest']);
64 if ($manifestAttributes->{'full-path'} == '/') {
65 $mimeType = (string) $manifestAttributes->{'media-type'};
67 break;
73 $zip->close();
75 return $mimeType === 'application/vnd.oasis.opendocument.spreadsheet';
78 return false;
81 /**
82 * Reads names of the worksheets from a file, without parsing the whole file to a PhpSpreadsheet object.
84 * @param string $pFilename
86 * @throws Exception
88 * @return string[]
90 public function listWorksheetNames($pFilename)
92 File::assertFile($pFilename);
94 $zip = new ZipArchive();
95 if (!$zip->open($pFilename)) {
96 throw new Exception('Could not open ' . $pFilename . ' for reading! Error opening file.');
99 $worksheetNames = [];
101 $xml = new XMLReader();
102 $xml->xml(
103 $this->securityScanFile('zip://' . realpath($pFilename) . '#content.xml'),
104 null,
105 Settings::getLibXmlLoaderOptions()
107 $xml->setParserProperty(2, true);
109 // Step into the first level of content of the XML
110 $xml->read();
111 while ($xml->read()) {
112 // Quickly jump through to the office:body node
113 while ($xml->name !== 'office:body') {
114 if ($xml->isEmptyElement) {
115 $xml->read();
116 } else {
117 $xml->next();
120 // Now read each node until we find our first table:table node
121 while ($xml->read()) {
122 if ($xml->name == 'table:table' && $xml->nodeType == XMLReader::ELEMENT) {
123 // Loop through each table:table node reading the table:name attribute for each worksheet name
124 do {
125 $worksheetNames[] = $xml->getAttribute('table:name');
126 $xml->next();
127 } while ($xml->name == 'table:table' && $xml->nodeType == XMLReader::ELEMENT);
132 return $worksheetNames;
136 * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
138 * @param string $pFilename
140 * @throws Exception
142 * @return array
144 public function listWorksheetInfo($pFilename)
146 File::assertFile($pFilename);
148 $worksheetInfo = [];
150 $zip = new ZipArchive();
151 if (!$zip->open($pFilename)) {
152 throw new Exception('Could not open ' . $pFilename . ' for reading! Error opening file.');
155 $xml = new XMLReader();
156 $xml->xml(
157 $this->securityScanFile('zip://' . realpath($pFilename) . '#content.xml'),
158 null,
159 Settings::getLibXmlLoaderOptions()
161 $xml->setParserProperty(2, true);
163 // Step into the first level of content of the XML
164 $xml->read();
165 while ($xml->read()) {
166 // Quickly jump through to the office:body node
167 while ($xml->name !== 'office:body') {
168 if ($xml->isEmptyElement) {
169 $xml->read();
170 } else {
171 $xml->next();
174 // Now read each node until we find our first table:table node
175 while ($xml->read()) {
176 if ($xml->name == 'table:table' && $xml->nodeType == XMLReader::ELEMENT) {
177 $worksheetNames[] = $xml->getAttribute('table:name');
179 $tmpInfo = [
180 'worksheetName' => $xml->getAttribute('table:name'),
181 'lastColumnLetter' => 'A',
182 'lastColumnIndex' => 0,
183 'totalRows' => 0,
184 'totalColumns' => 0,
187 // Loop through each child node of the table:table element reading
188 $currCells = 0;
189 do {
190 $xml->read();
191 if ($xml->name == 'table:table-row' && $xml->nodeType == XMLReader::ELEMENT) {
192 $rowspan = $xml->getAttribute('table:number-rows-repeated');
193 $rowspan = empty($rowspan) ? 1 : $rowspan;
194 $tmpInfo['totalRows'] += $rowspan;
195 $tmpInfo['totalColumns'] = max($tmpInfo['totalColumns'], $currCells);
196 $currCells = 0;
197 // Step into the row
198 $xml->read();
199 do {
200 if ($xml->name == 'table:table-cell' && $xml->nodeType == XMLReader::ELEMENT) {
201 if (!$xml->isEmptyElement) {
202 ++$currCells;
203 $xml->next();
204 } else {
205 $xml->read();
207 } elseif ($xml->name == 'table:covered-table-cell' && $xml->nodeType == XMLReader::ELEMENT) {
208 $mergeSize = $xml->getAttribute('table:number-columns-repeated');
209 $currCells += (int) $mergeSize;
210 $xml->read();
212 } while ($xml->name != 'table:table-row');
214 } while ($xml->name != 'table:table');
216 $tmpInfo['totalColumns'] = max($tmpInfo['totalColumns'], $currCells);
217 $tmpInfo['lastColumnIndex'] = $tmpInfo['totalColumns'] - 1;
218 $tmpInfo['lastColumnLetter'] = Coordinate::stringFromColumnIndex($tmpInfo['lastColumnIndex'] + 1);
219 $worksheetInfo[] = $tmpInfo;
224 return $worksheetInfo;
228 * Loads PhpSpreadsheet from file.
230 * @param string $pFilename
232 * @throws Exception
234 * @return Spreadsheet
236 public function load($pFilename)
238 // Create new Spreadsheet
239 $spreadsheet = new Spreadsheet();
241 // Load into this instance
242 return $this->loadIntoExisting($pFilename, $spreadsheet);
246 * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
248 * @param string $pFilename
249 * @param Spreadsheet $spreadsheet
251 * @throws Exception
253 * @return Spreadsheet
255 public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet)
257 File::assertFile($pFilename);
259 $timezoneObj = new DateTimeZone('Europe/London');
260 $GMT = new \DateTimeZone('UTC');
262 $zip = new ZipArchive();
263 if (!$zip->open($pFilename)) {
264 throw new Exception('Could not open ' . $pFilename . ' for reading! Error opening file.');
267 // Meta
269 $xml = simplexml_load_string(
270 $this->securityScan($zip->getFromName('meta.xml')),
271 'SimpleXMLElement',
272 Settings::getLibXmlLoaderOptions()
274 $namespacesMeta = $xml->getNamespaces(true);
276 $docProps = $spreadsheet->getProperties();
277 $officeProperty = $xml->children($namespacesMeta['office']);
278 foreach ($officeProperty as $officePropertyData) {
279 $officePropertyDC = [];
280 if (isset($namespacesMeta['dc'])) {
281 $officePropertyDC = $officePropertyData->children($namespacesMeta['dc']);
283 foreach ($officePropertyDC as $propertyName => $propertyValue) {
284 $propertyValue = (string) $propertyValue;
285 switch ($propertyName) {
286 case 'title':
287 $docProps->setTitle($propertyValue);
289 break;
290 case 'subject':
291 $docProps->setSubject($propertyValue);
293 break;
294 case 'creator':
295 $docProps->setCreator($propertyValue);
296 $docProps->setLastModifiedBy($propertyValue);
298 break;
299 case 'date':
300 $creationDate = strtotime($propertyValue);
301 $docProps->setCreated($creationDate);
302 $docProps->setModified($creationDate);
304 break;
305 case 'description':
306 $docProps->setDescription($propertyValue);
308 break;
311 $officePropertyMeta = [];
312 if (isset($namespacesMeta['dc'])) {
313 $officePropertyMeta = $officePropertyData->children($namespacesMeta['meta']);
315 foreach ($officePropertyMeta as $propertyName => $propertyValue) {
316 $propertyValueAttributes = $propertyValue->attributes($namespacesMeta['meta']);
317 $propertyValue = (string) $propertyValue;
318 switch ($propertyName) {
319 case 'initial-creator':
320 $docProps->setCreator($propertyValue);
322 break;
323 case 'keyword':
324 $docProps->setKeywords($propertyValue);
326 break;
327 case 'creation-date':
328 $creationDate = strtotime($propertyValue);
329 $docProps->setCreated($creationDate);
331 break;
332 case 'user-defined':
333 $propertyValueType = Properties::PROPERTY_TYPE_STRING;
334 foreach ($propertyValueAttributes as $key => $value) {
335 if ($key == 'name') {
336 $propertyValueName = (string) $value;
337 } elseif ($key == 'value-type') {
338 switch ($value) {
339 case 'date':
340 $propertyValue = Properties::convertProperty($propertyValue, 'date');
341 $propertyValueType = Properties::PROPERTY_TYPE_DATE;
343 break;
344 case 'boolean':
345 $propertyValue = Properties::convertProperty($propertyValue, 'bool');
346 $propertyValueType = Properties::PROPERTY_TYPE_BOOLEAN;
348 break;
349 case 'float':
350 $propertyValue = Properties::convertProperty($propertyValue, 'r4');
351 $propertyValueType = Properties::PROPERTY_TYPE_FLOAT;
353 break;
354 default:
355 $propertyValueType = Properties::PROPERTY_TYPE_STRING;
359 $docProps->setCustomProperty($propertyValueName, $propertyValue, $propertyValueType);
361 break;
366 // Content
368 $dom = new \DOMDocument('1.01', 'UTF-8');
369 $dom->loadXML(
370 $this->securityScan($zip->getFromName('content.xml')),
371 Settings::getLibXmlLoaderOptions()
374 $officeNs = $dom->lookupNamespaceUri('office');
375 $tableNs = $dom->lookupNamespaceUri('table');
376 $textNs = $dom->lookupNamespaceUri('text');
377 $xlinkNs = $dom->lookupNamespaceUri('xlink');
379 $spreadsheets = $dom->getElementsByTagNameNS($officeNs, 'body')
380 ->item(0)
381 ->getElementsByTagNameNS($officeNs, 'spreadsheet');
383 foreach ($spreadsheets as $workbookData) {
384 /** @var \DOMElement $workbookData */
385 $tables = $workbookData->getElementsByTagNameNS($tableNs, 'table');
387 $worksheetID = 0;
388 foreach ($tables as $worksheetDataSet) {
389 /** @var \DOMElement $worksheetDataSet */
390 $worksheetName = $worksheetDataSet->getAttributeNS($tableNs, 'name');
392 // Check loadSheetsOnly
393 if (isset($this->loadSheetsOnly)
394 && $worksheetName
395 && !in_array($worksheetName, $this->loadSheetsOnly)) {
396 continue;
399 // Create sheet
400 if ($worksheetID > 0) {
401 $spreadsheet->createSheet(); // First sheet is added by default
403 $spreadsheet->setActiveSheetIndex($worksheetID);
405 if ($worksheetName) {
406 // Use false for $updateFormulaCellReferences to prevent adjustment of worksheet references in
407 // formula cells... during the load, all formulae should be correct, and we're simply
408 // bringing the worksheet name in line with the formula, not the reverse
409 $spreadsheet->getActiveSheet()->setTitle($worksheetName, false, false);
412 // Go through every child of table element
413 $rowID = 1;
414 foreach ($worksheetDataSet->childNodes as $childNode) {
415 /** @var \DOMElement $childNode */
417 // Filter elements which are not under the "table" ns
418 if ($childNode->namespaceURI != $tableNs) {
419 continue;
422 $key = $childNode->nodeName;
424 // Remove ns from node name
425 if (strpos($key, ':') !== false) {
426 $keyChunks = explode(':', $key);
427 $key = array_pop($keyChunks);
430 switch ($key) {
431 case 'table-header-rows':
432 /// TODO :: Figure this out. This is only a partial implementation I guess.
433 // ($rowData it's not used at all and I'm not sure that PHPExcel
434 // has an API for this)
436 // foreach ($rowData as $keyRowData => $cellData) {
437 // $rowData = $cellData;
438 // break;
439 // }
440 break;
441 case 'table-row':
442 if ($childNode->hasAttributeNS($tableNs, 'number-rows-repeated')) {
443 $rowRepeats = $childNode->getAttributeNS($tableNs, 'number-rows-repeated');
444 } else {
445 $rowRepeats = 1;
448 $columnID = 'A';
449 foreach ($childNode->childNodes as $key => $cellData) {
450 // @var \DOMElement $cellData
452 if ($this->getReadFilter() !== null) {
453 if (!$this->getReadFilter()->readCell($columnID, $rowID, $worksheetName)) {
454 ++$columnID;
456 continue;
460 // Initialize variables
461 $formatting = $hyperlink = null;
462 $hasCalculatedValue = false;
463 $cellDataFormula = '';
465 if ($cellData->hasAttributeNS($tableNs, 'formula')) {
466 $cellDataFormula = $cellData->getAttributeNS($tableNs, 'formula');
467 $hasCalculatedValue = true;
470 // Annotations
471 $annotation = $cellData->getElementsByTagNameNS($officeNs, 'annotation');
473 if ($annotation->length > 0) {
474 $textNode = $annotation->item(0)->getElementsByTagNameNS($textNs, 'p');
476 if ($textNode->length > 0) {
477 $text = $this->scanElementForText($textNode->item(0));
479 $spreadsheet->getActiveSheet()
480 ->getComment($columnID . $rowID)
481 ->setText($this->parseRichText($text));
482 // ->setAuthor( $author )
486 // Content
488 /** @var \DOMElement[] $paragraphs */
489 $paragraphs = [];
491 foreach ($cellData->childNodes as $item) {
492 /** @var \DOMElement $item */
494 // Filter text:p elements
495 if ($item->nodeName == 'text:p') {
496 $paragraphs[] = $item;
500 if (count($paragraphs) > 0) {
501 // Consolidate if there are multiple p records (maybe with spans as well)
502 $dataArray = [];
504 // Text can have multiple text:p and within those, multiple text:span.
505 // text:p newlines, but text:span does not.
506 // Also, here we assume there is no text data is span fields are specified, since
507 // we have no way of knowing proper positioning anyway.
509 foreach ($paragraphs as $pData) {
510 $dataArray[] = $this->scanElementForText($pData);
512 $allCellDataText = implode($dataArray, "\n");
514 $type = $cellData->getAttributeNS($officeNs, 'value-type');
516 switch ($type) {
517 case 'string':
518 $type = DataType::TYPE_STRING;
519 $dataValue = $allCellDataText;
521 foreach ($paragraphs as $paragraph) {
522 $link = $paragraph->getElementsByTagNameNS($textNs, 'a');
523 if ($link->length > 0) {
524 $hyperlink = $link->item(0)->getAttributeNS($xlinkNs, 'href');
528 break;
529 case 'boolean':
530 $type = DataType::TYPE_BOOL;
531 $dataValue = ($allCellDataText == 'TRUE') ? true : false;
533 break;
534 case 'percentage':
535 $type = DataType::TYPE_NUMERIC;
536 $dataValue = (float) $cellData->getAttributeNS($officeNs, 'value');
538 if (floor($dataValue) == $dataValue) {
539 $dataValue = (int) $dataValue;
541 $formatting = NumberFormat::FORMAT_PERCENTAGE_00;
543 break;
544 case 'currency':
545 $type = DataType::TYPE_NUMERIC;
546 $dataValue = (float) $cellData->getAttributeNS($officeNs, 'value');
548 if (floor($dataValue) == $dataValue) {
549 $dataValue = (int) $dataValue;
551 $formatting = NumberFormat::FORMAT_CURRENCY_USD_SIMPLE;
553 break;
554 case 'float':
555 $type = DataType::TYPE_NUMERIC;
556 $dataValue = (float) $cellData->getAttributeNS($officeNs, 'value');
558 if (floor($dataValue) == $dataValue) {
559 if ($dataValue == (int) $dataValue) {
560 $dataValue = (int) $dataValue;
561 } else {
562 $dataValue = (float) $dataValue;
566 break;
567 case 'date':
568 $type = DataType::TYPE_NUMERIC;
569 $value = $cellData->getAttributeNS($officeNs, 'date-value');
571 $dateObj = new DateTime($value, $GMT);
572 $dateObj->setTimeZone($timezoneObj);
573 list($year, $month, $day, $hour, $minute, $second) = explode(
574 ' ',
575 $dateObj->format('Y m d H i s')
578 $dataValue = Date::formattedPHPToExcel(
579 $year,
580 $month,
581 $day,
582 $hour,
583 $minute,
584 $second
587 if ($dataValue != floor($dataValue)) {
588 $formatting = NumberFormat::FORMAT_DATE_XLSX15
589 . ' '
590 . NumberFormat::FORMAT_DATE_TIME4;
591 } else {
592 $formatting = NumberFormat::FORMAT_DATE_XLSX15;
595 break;
596 case 'time':
597 $type = DataType::TYPE_NUMERIC;
599 $timeValue = $cellData->getAttributeNS($officeNs, 'time-value');
601 $dataValue = Date::PHPToExcel(
602 strtotime(
603 '01-01-1970 ' . implode(':', sscanf($timeValue, 'PT%dH%dM%dS'))
606 $formatting = NumberFormat::FORMAT_DATE_TIME4;
608 break;
609 default:
610 $dataValue = null;
612 } else {
613 $type = DataType::TYPE_NULL;
614 $dataValue = null;
617 if ($hasCalculatedValue) {
618 $type = DataType::TYPE_FORMULA;
619 $cellDataFormula = substr($cellDataFormula, strpos($cellDataFormula, ':=') + 1);
620 $temp = explode('"', $cellDataFormula);
621 $tKey = false;
622 foreach ($temp as &$value) {
623 // Only replace in alternate array entries (i.e. non-quoted blocks)
624 if ($tKey = !$tKey) {
625 // Cell range reference in another sheet
626 $value = preg_replace('/\[([^\.]+)\.([^\.]+):\.([^\.]+)\]/U', '$1!$2:$3', $value);
628 // Cell reference in another sheet
629 $value = preg_replace('/\[([^\.]+)\.([^\.]+)\]/U', '$1!$2', $value);
631 // Cell range reference
632 $value = preg_replace('/\[\.([^\.]+):\.([^\.]+)\]/U', '$1:$2', $value);
634 // Simple cell reference
635 $value = preg_replace('/\[\.([^\.]+)\]/U', '$1', $value);
637 $value = Calculation::translateSeparator(';', ',', $value, $inBraces);
640 unset($value);
642 // Then rebuild the formula string
643 $cellDataFormula = implode('"', $temp);
646 if ($cellData->hasAttributeNS($tableNs, 'number-columns-repeated')) {
647 $colRepeats = (int) $cellData->getAttributeNS($tableNs, 'number-columns-repeated');
648 } else {
649 $colRepeats = 1;
652 if ($type !== null) {
653 for ($i = 0; $i < $colRepeats; ++$i) {
654 if ($i > 0) {
655 ++$columnID;
658 if ($type !== DataType::TYPE_NULL) {
659 for ($rowAdjust = 0; $rowAdjust < $rowRepeats; ++$rowAdjust) {
660 $rID = $rowID + $rowAdjust;
662 $cell = $spreadsheet->getActiveSheet()
663 ->getCell($columnID . $rID);
665 // Set value
666 if ($hasCalculatedValue) {
667 $cell->setValueExplicit($cellDataFormula, $type);
668 } else {
669 $cell->setValueExplicit($dataValue, $type);
672 if ($hasCalculatedValue) {
673 $cell->setCalculatedValue($dataValue);
676 // Set other properties
677 if ($formatting !== null) {
678 $spreadsheet->getActiveSheet()
679 ->getStyle($columnID . $rID)
680 ->getNumberFormat()
681 ->setFormatCode($formatting);
682 } else {
683 $spreadsheet->getActiveSheet()
684 ->getStyle($columnID . $rID)
685 ->getNumberFormat()
686 ->setFormatCode(NumberFormat::FORMAT_GENERAL);
689 if ($hyperlink !== null) {
690 $cell->getHyperlink()
691 ->setUrl($hyperlink);
698 // Merged cells
699 if ($cellData->hasAttributeNS($tableNs, 'number-columns-spanned')
700 || $cellData->hasAttributeNS($tableNs, 'number-rows-spanned')
702 if (($type !== DataType::TYPE_NULL) || (!$this->readDataOnly)) {
703 $columnTo = $columnID;
705 if ($cellData->hasAttributeNS($tableNs, 'number-columns-spanned')) {
706 $columnIndex = Coordinate::columnIndexFromString($columnID);
707 $columnIndex += (int) $cellData->getAttributeNS($tableNs, 'number-columns-spanned');
708 $columnIndex -= 2;
710 $columnTo = Coordinate::stringFromColumnIndex($columnIndex + 1);
713 $rowTo = $rowID;
715 if ($cellData->hasAttributeNS($tableNs, 'number-rows-spanned')) {
716 $rowTo = $rowTo + (int) $cellData->getAttributeNS($tableNs, 'number-rows-spanned') - 1;
719 $cellRange = $columnID . $rowID . ':' . $columnTo . $rowTo;
720 $spreadsheet->getActiveSheet()->mergeCells($cellRange);
724 ++$columnID;
726 $rowID += $rowRepeats;
728 break;
731 ++$worksheetID;
735 // Return
736 return $spreadsheet;
740 * Recursively scan element.
742 * @param \DOMNode $element
744 * @return string
746 protected function scanElementForText(\DOMNode $element)
748 $str = '';
749 foreach ($element->childNodes as $child) {
750 /** @var \DOMNode $child */
751 if ($child->nodeType == XML_TEXT_NODE) {
752 $str .= $child->nodeValue;
753 } elseif ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName == 'text:s') {
754 // It's a space
756 // Multiple spaces?
757 /** @var \DOMAttr $cAttr */
758 $cAttr = $child->attributes->getNamedItem('c');
759 if ($cAttr) {
760 $multiplier = (int) $cAttr->nodeValue;
761 } else {
762 $multiplier = 1;
765 $str .= str_repeat(' ', $multiplier);
768 if ($child->hasChildNodes()) {
769 $str .= $this->scanElementForText($child);
773 return $str;
777 * @param string $is
779 * @return RichText
781 private function parseRichText($is)
783 $value = new RichText();
784 $value->createText($is);
786 return $value;