3 namespace PhpOffice\PhpSpreadsheet\Reader
;
7 use PhpOffice\PhpSpreadsheet\Calculation\Calculation
;
8 use PhpOffice\PhpSpreadsheet\Cell\Coordinate
;
9 use PhpOffice\PhpSpreadsheet\Cell\DataType
;
10 use PhpOffice\PhpSpreadsheet\Document\Properties
;
11 use PhpOffice\PhpSpreadsheet\RichText\RichText
;
12 use PhpOffice\PhpSpreadsheet\Settings
;
13 use PhpOffice\PhpSpreadsheet\Shared\Date
;
14 use PhpOffice\PhpSpreadsheet\Shared\File
;
15 use PhpOffice\PhpSpreadsheet\Spreadsheet
;
16 use PhpOffice\PhpSpreadsheet\Style\NumberFormat
;
20 class Ods
extends BaseReader
23 * Create a new Ods Reader instance.
25 public function __construct()
27 $this->readFilter
= new DefaultReadFilter();
31 * Can the current IReader read the file?
33 * @param string $pFilename
39 public function canRead($pFilename)
41 File
::assertFile($pFilename);
43 $mimeType = 'UNKNOWN';
47 $zip = new ZipArchive();
48 if ($zip->open($pFilename) === true) {
49 // check if it is an OOXML archive
50 $stat = $zip->statName('mimetype');
51 if ($stat && ($stat['size'] <= 255)) {
52 $mimeType = $zip->getFromName($stat['name']);
53 } elseif ($stat = $zip->statName('META-INF/manifest.xml')) {
54 $xml = simplexml_load_string(
55 $this->securityScan($zip->getFromName('META-INF/manifest.xml')),
57 Settings
::getLibXmlLoaderOptions()
59 $namespacesContent = $xml->getNamespaces(true);
60 if (isset($namespacesContent['manifest'])) {
61 $manifest = $xml->children($namespacesContent['manifest']);
62 foreach ($manifest as $manifestDataSet) {
63 $manifestAttributes = $manifestDataSet->attributes($namespacesContent['manifest']);
64 if ($manifestAttributes->{'full-path'} == '/') {
65 $mimeType = (string) $manifestAttributes->{'media-type'};
75 return $mimeType === 'application/vnd.oasis.opendocument.spreadsheet';
82 * Reads names of the worksheets from a file, without parsing the whole file to a PhpSpreadsheet object.
84 * @param string $pFilename
90 public function listWorksheetNames($pFilename)
92 File
::assertFile($pFilename);
94 $zip = new ZipArchive();
95 if (!$zip->open($pFilename)) {
96 throw new Exception('Could not open ' . $pFilename . ' for reading! Error opening file.');
101 $xml = new XMLReader();
103 $this->securityScanFile('zip://' . realpath($pFilename) . '#content.xml'),
105 Settings
::getLibXmlLoaderOptions()
107 $xml->setParserProperty(2, true);
109 // Step into the first level of content of the XML
111 while ($xml->read()) {
112 // Quickly jump through to the office:body node
113 while ($xml->name
!== 'office:body') {
114 if ($xml->isEmptyElement
) {
120 // Now read each node until we find our first table:table node
121 while ($xml->read()) {
122 if ($xml->name
== 'table:table' && $xml->nodeType
== XMLReader
::ELEMENT
) {
123 // Loop through each table:table node reading the table:name attribute for each worksheet name
125 $worksheetNames[] = $xml->getAttribute('table:name');
127 } while ($xml->name
== 'table:table' && $xml->nodeType
== XMLReader
::ELEMENT
);
132 return $worksheetNames;
136 * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
138 * @param string $pFilename
144 public function listWorksheetInfo($pFilename)
146 File
::assertFile($pFilename);
150 $zip = new ZipArchive();
151 if (!$zip->open($pFilename)) {
152 throw new Exception('Could not open ' . $pFilename . ' for reading! Error opening file.');
155 $xml = new XMLReader();
157 $this->securityScanFile('zip://' . realpath($pFilename) . '#content.xml'),
159 Settings
::getLibXmlLoaderOptions()
161 $xml->setParserProperty(2, true);
163 // Step into the first level of content of the XML
165 while ($xml->read()) {
166 // Quickly jump through to the office:body node
167 while ($xml->name
!== 'office:body') {
168 if ($xml->isEmptyElement
) {
174 // Now read each node until we find our first table:table node
175 while ($xml->read()) {
176 if ($xml->name
== 'table:table' && $xml->nodeType
== XMLReader
::ELEMENT
) {
177 $worksheetNames[] = $xml->getAttribute('table:name');
180 'worksheetName' => $xml->getAttribute('table:name'),
181 'lastColumnLetter' => 'A',
182 'lastColumnIndex' => 0,
187 // Loop through each child node of the table:table element reading
191 if ($xml->name
== 'table:table-row' && $xml->nodeType
== XMLReader
::ELEMENT
) {
192 $rowspan = $xml->getAttribute('table:number-rows-repeated');
193 $rowspan = empty($rowspan) ?
1 : $rowspan;
194 $tmpInfo['totalRows'] +
= $rowspan;
195 $tmpInfo['totalColumns'] = max($tmpInfo['totalColumns'], $currCells);
200 if ($xml->name
== 'table:table-cell' && $xml->nodeType
== XMLReader
::ELEMENT
) {
201 if (!$xml->isEmptyElement
) {
207 } elseif ($xml->name
== 'table:covered-table-cell' && $xml->nodeType
== XMLReader
::ELEMENT
) {
208 $mergeSize = $xml->getAttribute('table:number-columns-repeated');
209 $currCells +
= (int) $mergeSize;
212 } while ($xml->name
!= 'table:table-row');
214 } while ($xml->name
!= 'table:table');
216 $tmpInfo['totalColumns'] = max($tmpInfo['totalColumns'], $currCells);
217 $tmpInfo['lastColumnIndex'] = $tmpInfo['totalColumns'] - 1;
218 $tmpInfo['lastColumnLetter'] = Coordinate
::stringFromColumnIndex($tmpInfo['lastColumnIndex'] +
1);
219 $worksheetInfo[] = $tmpInfo;
224 return $worksheetInfo;
228 * Loads PhpSpreadsheet from file.
230 * @param string $pFilename
234 * @return Spreadsheet
236 public function load($pFilename)
238 // Create new Spreadsheet
239 $spreadsheet = new Spreadsheet();
241 // Load into this instance
242 return $this->loadIntoExisting($pFilename, $spreadsheet);
246 * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
248 * @param string $pFilename
249 * @param Spreadsheet $spreadsheet
253 * @return Spreadsheet
255 public function loadIntoExisting($pFilename, Spreadsheet
$spreadsheet)
257 File
::assertFile($pFilename);
259 $timezoneObj = new DateTimeZone('Europe/London');
260 $GMT = new \
DateTimeZone('UTC');
262 $zip = new ZipArchive();
263 if (!$zip->open($pFilename)) {
264 throw new Exception('Could not open ' . $pFilename . ' for reading! Error opening file.');
269 $xml = simplexml_load_string(
270 $this->securityScan($zip->getFromName('meta.xml')),
272 Settings
::getLibXmlLoaderOptions()
274 $namespacesMeta = $xml->getNamespaces(true);
276 $docProps = $spreadsheet->getProperties();
277 $officeProperty = $xml->children($namespacesMeta['office']);
278 foreach ($officeProperty as $officePropertyData) {
279 $officePropertyDC = [];
280 if (isset($namespacesMeta['dc'])) {
281 $officePropertyDC = $officePropertyData->children($namespacesMeta['dc']);
283 foreach ($officePropertyDC as $propertyName => $propertyValue) {
284 $propertyValue = (string) $propertyValue;
285 switch ($propertyName) {
287 $docProps->setTitle($propertyValue);
291 $docProps->setSubject($propertyValue);
295 $docProps->setCreator($propertyValue);
296 $docProps->setLastModifiedBy($propertyValue);
300 $creationDate = strtotime($propertyValue);
301 $docProps->setCreated($creationDate);
302 $docProps->setModified($creationDate);
306 $docProps->setDescription($propertyValue);
311 $officePropertyMeta = [];
312 if (isset($namespacesMeta['dc'])) {
313 $officePropertyMeta = $officePropertyData->children($namespacesMeta['meta']);
315 foreach ($officePropertyMeta as $propertyName => $propertyValue) {
316 $propertyValueAttributes = $propertyValue->attributes($namespacesMeta['meta']);
317 $propertyValue = (string) $propertyValue;
318 switch ($propertyName) {
319 case 'initial-creator':
320 $docProps->setCreator($propertyValue);
324 $docProps->setKeywords($propertyValue);
327 case 'creation-date':
328 $creationDate = strtotime($propertyValue);
329 $docProps->setCreated($creationDate);
333 $propertyValueType = Properties
::PROPERTY_TYPE_STRING
;
334 foreach ($propertyValueAttributes as $key => $value) {
335 if ($key == 'name') {
336 $propertyValueName = (string) $value;
337 } elseif ($key == 'value-type') {
340 $propertyValue = Properties
::convertProperty($propertyValue, 'date');
341 $propertyValueType = Properties
::PROPERTY_TYPE_DATE
;
345 $propertyValue = Properties
::convertProperty($propertyValue, 'bool');
346 $propertyValueType = Properties
::PROPERTY_TYPE_BOOLEAN
;
350 $propertyValue = Properties
::convertProperty($propertyValue, 'r4');
351 $propertyValueType = Properties
::PROPERTY_TYPE_FLOAT
;
355 $propertyValueType = Properties
::PROPERTY_TYPE_STRING
;
359 $docProps->setCustomProperty($propertyValueName, $propertyValue, $propertyValueType);
368 $dom = new \
DOMDocument('1.01', 'UTF-8');
370 $this->securityScan($zip->getFromName('content.xml')),
371 Settings
::getLibXmlLoaderOptions()
374 $officeNs = $dom->lookupNamespaceUri('office');
375 $tableNs = $dom->lookupNamespaceUri('table');
376 $textNs = $dom->lookupNamespaceUri('text');
377 $xlinkNs = $dom->lookupNamespaceUri('xlink');
379 $spreadsheets = $dom->getElementsByTagNameNS($officeNs, 'body')
381 ->getElementsByTagNameNS($officeNs, 'spreadsheet');
383 foreach ($spreadsheets as $workbookData) {
384 /** @var \DOMElement $workbookData */
385 $tables = $workbookData->getElementsByTagNameNS($tableNs, 'table');
388 foreach ($tables as $worksheetDataSet) {
389 /** @var \DOMElement $worksheetDataSet */
390 $worksheetName = $worksheetDataSet->getAttributeNS($tableNs, 'name');
392 // Check loadSheetsOnly
393 if (isset($this->loadSheetsOnly
)
395 && !in_array($worksheetName, $this->loadSheetsOnly
)) {
400 if ($worksheetID > 0) {
401 $spreadsheet->createSheet(); // First sheet is added by default
403 $spreadsheet->setActiveSheetIndex($worksheetID);
405 if ($worksheetName) {
406 // Use false for $updateFormulaCellReferences to prevent adjustment of worksheet references in
407 // formula cells... during the load, all formulae should be correct, and we're simply
408 // bringing the worksheet name in line with the formula, not the reverse
409 $spreadsheet->getActiveSheet()->setTitle($worksheetName, false, false);
412 // Go through every child of table element
414 foreach ($worksheetDataSet->childNodes
as $childNode) {
415 /** @var \DOMElement $childNode */
417 // Filter elements which are not under the "table" ns
418 if ($childNode->namespaceURI
!= $tableNs) {
422 $key = $childNode->nodeName
;
424 // Remove ns from node name
425 if (strpos($key, ':') !== false) {
426 $keyChunks = explode(':', $key);
427 $key = array_pop($keyChunks);
431 case 'table-header-rows':
432 /// TODO :: Figure this out. This is only a partial implementation I guess.
433 // ($rowData it's not used at all and I'm not sure that PHPExcel
434 // has an API for this)
436 // foreach ($rowData as $keyRowData => $cellData) {
437 // $rowData = $cellData;
442 if ($childNode->hasAttributeNS($tableNs, 'number-rows-repeated')) {
443 $rowRepeats = $childNode->getAttributeNS($tableNs, 'number-rows-repeated');
449 foreach ($childNode->childNodes
as $key => $cellData) {
450 // @var \DOMElement $cellData
452 if ($this->getReadFilter() !== null) {
453 if (!$this->getReadFilter()->readCell($columnID, $rowID, $worksheetName)) {
460 // Initialize variables
461 $formatting = $hyperlink = null;
462 $hasCalculatedValue = false;
463 $cellDataFormula = '';
465 if ($cellData->hasAttributeNS($tableNs, 'formula')) {
466 $cellDataFormula = $cellData->getAttributeNS($tableNs, 'formula');
467 $hasCalculatedValue = true;
471 $annotation = $cellData->getElementsByTagNameNS($officeNs, 'annotation');
473 if ($annotation->length
> 0) {
474 $textNode = $annotation->item(0)->getElementsByTagNameNS($textNs, 'p');
476 if ($textNode->length
> 0) {
477 $text = $this->scanElementForText($textNode->item(0));
479 $spreadsheet->getActiveSheet()
480 ->getComment($columnID . $rowID)
481 ->setText($this->parseRichText($text));
482 // ->setAuthor( $author )
488 /** @var \DOMElement[] $paragraphs */
491 foreach ($cellData->childNodes
as $item) {
492 /** @var \DOMElement $item */
494 // Filter text:p elements
495 if ($item->nodeName
== 'text:p') {
496 $paragraphs[] = $item;
500 if (count($paragraphs) > 0) {
501 // Consolidate if there are multiple p records (maybe with spans as well)
504 // Text can have multiple text:p and within those, multiple text:span.
505 // text:p newlines, but text:span does not.
506 // Also, here we assume there is no text data is span fields are specified, since
507 // we have no way of knowing proper positioning anyway.
509 foreach ($paragraphs as $pData) {
510 $dataArray[] = $this->scanElementForText($pData);
512 $allCellDataText = implode($dataArray, "\n");
514 $type = $cellData->getAttributeNS($officeNs, 'value-type');
518 $type = DataType
::TYPE_STRING
;
519 $dataValue = $allCellDataText;
521 foreach ($paragraphs as $paragraph) {
522 $link = $paragraph->getElementsByTagNameNS($textNs, 'a');
523 if ($link->length
> 0) {
524 $hyperlink = $link->item(0)->getAttributeNS($xlinkNs, 'href');
530 $type = DataType
::TYPE_BOOL
;
531 $dataValue = ($allCellDataText == 'TRUE') ?
true : false;
535 $type = DataType
::TYPE_NUMERIC
;
536 $dataValue = (float) $cellData->getAttributeNS($officeNs, 'value');
538 if (floor($dataValue) == $dataValue) {
539 $dataValue = (int) $dataValue;
541 $formatting = NumberFormat
::FORMAT_PERCENTAGE_00
;
545 $type = DataType
::TYPE_NUMERIC
;
546 $dataValue = (float) $cellData->getAttributeNS($officeNs, 'value');
548 if (floor($dataValue) == $dataValue) {
549 $dataValue = (int) $dataValue;
551 $formatting = NumberFormat
::FORMAT_CURRENCY_USD_SIMPLE
;
555 $type = DataType
::TYPE_NUMERIC
;
556 $dataValue = (float) $cellData->getAttributeNS($officeNs, 'value');
558 if (floor($dataValue) == $dataValue) {
559 if ($dataValue == (int) $dataValue) {
560 $dataValue = (int) $dataValue;
562 $dataValue = (float) $dataValue;
568 $type = DataType
::TYPE_NUMERIC
;
569 $value = $cellData->getAttributeNS($officeNs, 'date-value');
571 $dateObj = new DateTime($value, $GMT);
572 $dateObj->setTimeZone($timezoneObj);
573 list($year, $month, $day, $hour, $minute, $second) = explode(
575 $dateObj->format('Y m d H i s')
578 $dataValue = Date
::formattedPHPToExcel(
587 if ($dataValue != floor($dataValue)) {
588 $formatting = NumberFormat
::FORMAT_DATE_XLSX15
590 . NumberFormat
::FORMAT_DATE_TIME4
;
592 $formatting = NumberFormat
::FORMAT_DATE_XLSX15
;
597 $type = DataType
::TYPE_NUMERIC
;
599 $timeValue = $cellData->getAttributeNS($officeNs, 'time-value');
601 $dataValue = Date
::PHPToExcel(
603 '01-01-1970 ' . implode(':', sscanf($timeValue, 'PT%dH%dM%dS'))
606 $formatting = NumberFormat
::FORMAT_DATE_TIME4
;
613 $type = DataType
::TYPE_NULL
;
617 if ($hasCalculatedValue) {
618 $type = DataType
::TYPE_FORMULA
;
619 $cellDataFormula = substr($cellDataFormula, strpos($cellDataFormula, ':=') +
1);
620 $temp = explode('"', $cellDataFormula);
622 foreach ($temp as &$value) {
623 // Only replace in alternate array entries (i.e. non-quoted blocks)
624 if ($tKey = !$tKey) {
625 // Cell range reference in another sheet
626 $value = preg_replace('/\[([^\.]+)\.([^\.]+):\.([^\.]+)\]/U', '$1!$2:$3', $value);
628 // Cell reference in another sheet
629 $value = preg_replace('/\[([^\.]+)\.([^\.]+)\]/U', '$1!$2', $value);
631 // Cell range reference
632 $value = preg_replace('/\[\.([^\.]+):\.([^\.]+)\]/U', '$1:$2', $value);
634 // Simple cell reference
635 $value = preg_replace('/\[\.([^\.]+)\]/U', '$1', $value);
637 $value = Calculation
::translateSeparator(';', ',', $value, $inBraces);
642 // Then rebuild the formula string
643 $cellDataFormula = implode('"', $temp);
646 if ($cellData->hasAttributeNS($tableNs, 'number-columns-repeated')) {
647 $colRepeats = (int) $cellData->getAttributeNS($tableNs, 'number-columns-repeated');
652 if ($type !== null) {
653 for ($i = 0; $i < $colRepeats; ++
$i) {
658 if ($type !== DataType
::TYPE_NULL
) {
659 for ($rowAdjust = 0; $rowAdjust < $rowRepeats; ++
$rowAdjust) {
660 $rID = $rowID +
$rowAdjust;
662 $cell = $spreadsheet->getActiveSheet()
663 ->getCell($columnID . $rID);
666 if ($hasCalculatedValue) {
667 $cell->setValueExplicit($cellDataFormula, $type);
669 $cell->setValueExplicit($dataValue, $type);
672 if ($hasCalculatedValue) {
673 $cell->setCalculatedValue($dataValue);
676 // Set other properties
677 if ($formatting !== null) {
678 $spreadsheet->getActiveSheet()
679 ->getStyle($columnID . $rID)
681 ->setFormatCode($formatting);
683 $spreadsheet->getActiveSheet()
684 ->getStyle($columnID . $rID)
686 ->setFormatCode(NumberFormat
::FORMAT_GENERAL
);
689 if ($hyperlink !== null) {
690 $cell->getHyperlink()
691 ->setUrl($hyperlink);
699 if ($cellData->hasAttributeNS($tableNs, 'number-columns-spanned')
700 ||
$cellData->hasAttributeNS($tableNs, 'number-rows-spanned')
702 if (($type !== DataType
::TYPE_NULL
) ||
(!$this->readDataOnly
)) {
703 $columnTo = $columnID;
705 if ($cellData->hasAttributeNS($tableNs, 'number-columns-spanned')) {
706 $columnIndex = Coordinate
::columnIndexFromString($columnID);
707 $columnIndex +
= (int) $cellData->getAttributeNS($tableNs, 'number-columns-spanned');
710 $columnTo = Coordinate
::stringFromColumnIndex($columnIndex +
1);
715 if ($cellData->hasAttributeNS($tableNs, 'number-rows-spanned')) {
716 $rowTo = $rowTo +
(int) $cellData->getAttributeNS($tableNs, 'number-rows-spanned') - 1;
719 $cellRange = $columnID . $rowID . ':' . $columnTo . $rowTo;
720 $spreadsheet->getActiveSheet()->mergeCells($cellRange);
726 $rowID +
= $rowRepeats;
740 * Recursively scan element.
742 * @param \DOMNode $element
746 protected function scanElementForText(\DOMNode
$element)
749 foreach ($element->childNodes
as $child) {
750 /** @var \DOMNode $child */
751 if ($child->nodeType
== XML_TEXT_NODE
) {
752 $str .= $child->nodeValue
;
753 } elseif ($child->nodeType
== XML_ELEMENT_NODE
&& $child->nodeName
== 'text:s') {
757 /** @var \DOMAttr $cAttr */
758 $cAttr = $child->attributes
->getNamedItem('c');
760 $multiplier = (int) $cAttr->nodeValue
;
765 $str .= str_repeat(' ', $multiplier);
768 if ($child->hasChildNodes()) {
769 $str .= $this->scanElementForText($child);
781 private function parseRichText($is)
783 $value = new RichText();
784 $value->createText($is);