use boost flat_map for faster map
[LibreOffice.git] / l10ntools / source / xmlparse.cxx
blobe7c4fad6eb9537fbd72165173b30e775c2246cd8
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
19 #include <sal/config.h>
21 #include <iterator> /* std::iterator*/
23 #include <cassert>
24 #include <stdio.h>
25 #include <string_view>
27 #include <helper.hxx>
28 #include <common.hxx>
29 #include <xmlparse.hxx>
30 #include <fstream>
31 #include <iostream>
32 #include <osl/thread.hxx>
33 #include <osl/process.h>
34 #include <rtl/strbuf.hxx>
35 #include <unicode/regex.h>
37 using namespace osl;
39 #define XML_LANG "xml-lang"
44 XMLChildNode::XMLChildNode( XMLParentNode *pPar )
45 : m_pParent( pPar )
47 if ( m_pParent )
48 m_pParent->AddChild( this );
52 XMLChildNode::XMLChildNode( const XMLChildNode& rObj)
53 : XMLNode(rObj),
54 m_pParent(rObj.m_pParent)
58 XMLChildNode& XMLChildNode::operator=(const XMLChildNode& rObj)
60 if(this != &rObj)
62 m_pParent=rObj.m_pParent;
64 return *this;
70 XMLParentNode::~XMLParentNode()
72 if( m_pChildList )
74 RemoveAndDeleteAllChildren();
78 XMLParentNode::XMLParentNode( const XMLParentNode& rObj)
79 : XMLChildNode( rObj )
81 if( !rObj.m_pChildList )
82 return;
84 m_pChildList.reset( new XMLChildNodeList );
85 for ( size_t i = 0; i < rObj.m_pChildList->size(); i++ )
87 XMLChildNode* pNode = (*rObj.m_pChildList)[ i ];
88 if( pNode != nullptr)
90 switch(pNode->GetNodeType())
92 case XMLNodeType::ELEMENT:
93 AddChild( new XMLElement( *static_cast<XMLElement* >(pNode) ) ); break;
94 case XMLNodeType::DATA:
95 AddChild( new XMLData ( *static_cast<XMLData* > (pNode) ) ); break;
96 case XMLNodeType::COMMENT:
97 AddChild( new XMLComment( *static_cast<XMLComment* >(pNode) ) ); break;
98 case XMLNodeType::DEFAULT:
99 AddChild( new XMLDefault( *static_cast<XMLDefault* >(pNode) ) ); break;
100 default: fprintf(stdout,"XMLParentNode::XMLParentNode( const XMLParentNode& rObj) strange obj");
106 XMLParentNode& XMLParentNode::operator=(const XMLParentNode& rObj)
108 if(this!=&rObj)
110 XMLChildNode::operator=(rObj);
111 if( m_pChildList )
113 RemoveAndDeleteAllChildren();
115 if( rObj.m_pChildList )
117 m_pChildList.reset( new XMLChildNodeList );
118 for ( size_t i = 0; i < rObj.m_pChildList->size(); i++ )
119 AddChild( (*rObj.m_pChildList)[ i ] );
121 else
122 m_pChildList.reset();
125 return *this;
127 void XMLParentNode::AddChild( XMLChildNode *pChild )
129 if ( !m_pChildList )
130 m_pChildList.reset( new XMLChildNodeList );
131 m_pChildList->push_back( pChild );
134 void XMLParentNode::RemoveAndDeleteAllChildren()
136 if ( m_pChildList )
138 for ( size_t i = 0; i < m_pChildList->size(); i++ )
139 delete (*m_pChildList)[ i ];
140 m_pChildList->clear();
147 void XMLFile::Write( OString const &aFilename )
149 std::ofstream s(
150 aFilename.getStr(), std::ios_base::out | std::ios_base::trunc);
151 if (!s.is_open())
153 std::cerr
154 << "Error: helpex cannot create file " << aFilename
155 << '\n';
156 std::exit(EXIT_FAILURE);
158 Write(s);
159 s.close();
162 void XMLFile::Write( std::ofstream &rStream , XMLNode *pCur )
164 if ( !pCur )
165 Write( rStream, this );
166 else {
167 switch( pCur->GetNodeType())
169 case XMLNodeType::XFILE:
171 if( GetChildList())
172 for ( size_t i = 0; i < GetChildList()->size(); i++ )
173 Write( rStream, (*GetChildList())[ i ] );
175 break;
176 case XMLNodeType::ELEMENT:
178 XMLElement *pElement = static_cast<XMLElement*>(pCur);
179 rStream << "<";
180 rStream << pElement->GetName();
181 if ( pElement->GetAttributeList())
182 for ( size_t j = 0; j < pElement->GetAttributeList()->size(); j++ )
184 rStream << " ";
185 OString sData( (*pElement->GetAttributeList())[ j ]->GetName() );
186 rStream << XMLUtil::QuotHTML( sData );
187 rStream << "=\"";
188 sData = (*pElement->GetAttributeList())[ j ]->GetValue();
189 rStream << XMLUtil::QuotHTML( sData );
190 rStream << "\"";
192 if ( !pElement->GetChildList())
193 rStream << "/>";
194 else
196 rStream << ">";
197 for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ )
198 Write( rStream, (*pElement->GetChildList())[ k ] );
199 rStream << "</";
200 rStream << pElement->GetName();
201 rStream << ">";
204 break;
205 case XMLNodeType::DATA:
207 OString sData( static_cast<const XMLData*>(pCur)->GetData());
208 rStream << XMLUtil::QuotHTML( sData );
210 break;
211 case XMLNodeType::COMMENT:
213 const XMLComment *pComment = static_cast<const XMLComment*>(pCur);
214 rStream << "<!--";
215 rStream << pComment->GetComment();
216 rStream << "-->";
218 break;
219 case XMLNodeType::DEFAULT:
221 const XMLDefault *pDefault = static_cast<const XMLDefault*>(pCur);
222 rStream << pDefault->GetDefault();
224 break;
229 void XMLFile::Print( XMLNode *pCur, sal_uInt16 nLevel )
231 if ( !pCur )
232 Print( this );
233 else
235 switch( pCur->GetNodeType())
237 case XMLNodeType::XFILE:
239 if( GetChildList())
240 for ( size_t i = 0; i < GetChildList()->size(); i++ )
241 Print( (*GetChildList())[ i ] );
243 break;
244 case XMLNodeType::ELEMENT:
246 XMLElement *pElement = static_cast<XMLElement*>(pCur);
248 fprintf( stdout, "<%s", pElement->GetName().getStr());
249 if ( pElement->GetAttributeList())
251 for (size_t j = 0; j < pElement->GetAttributeList()->size(); ++j)
253 const OString aAttrName((*pElement->GetAttributeList())[j]->GetName());
254 if (!aAttrName.equalsIgnoreAsciiCase(XML_LANG))
256 fprintf( stdout, " %s=\"%s\"",
257 aAttrName.getStr(),
258 (*pElement->GetAttributeList())[ j ]->GetValue().getStr());
262 if ( !pElement->GetChildList())
263 fprintf( stdout, "/>" );
264 else
266 fprintf( stdout, ">" );
267 for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ )
268 Print( (*pElement->GetChildList())[ k ], nLevel + 1 );
269 fprintf( stdout, "</%s>", pElement->GetName().getStr());
272 break;
273 case XMLNodeType::DATA:
275 const XMLData *pData = static_cast<const XMLData*>(pCur);
276 fprintf( stdout, "%s", pData->GetData().getStr());
278 break;
279 case XMLNodeType::COMMENT:
281 const XMLComment *pComment = static_cast<const XMLComment*>(pCur);
282 fprintf( stdout, "<!--%s-->", pComment->GetComment().getStr());
284 break;
285 case XMLNodeType::DEFAULT:
287 const XMLDefault *pDefault = static_cast<const XMLDefault*>(pCur);
288 fprintf( stdout, "%s", pDefault->GetDefault().getStr());
290 break;
294 XMLFile::~XMLFile()
296 if( m_pXMLStrings )
298 for (auto const& pos : *m_pXMLStrings)
300 delete pos.second; // Check and delete content also ?
305 XMLFile::XMLFile( const OString &rFileName ) // the file name, empty if created from memory stream
306 : XMLParentNode( nullptr )
307 , m_sFileName( rFileName )
309 m_aNodes_localize.emplace( OString("bookmark") , true );
310 m_aNodes_localize.emplace( OString("variable") , true );
311 m_aNodes_localize.emplace( OString("paragraph") , true );
312 m_aNodes_localize.emplace( OString("h1") , true );
313 m_aNodes_localize.emplace( OString("h2") , true );
314 m_aNodes_localize.emplace( OString("h3") , true );
315 m_aNodes_localize.emplace( OString("h4") , true );
316 m_aNodes_localize.emplace( OString("h5") , true );
317 m_aNodes_localize.emplace( OString("h6") , true );
318 m_aNodes_localize.emplace( OString("note") , true );
319 m_aNodes_localize.emplace( OString("tip") , true );
320 m_aNodes_localize.emplace( OString("warning") , true );
321 m_aNodes_localize.emplace( OString("alt") , true );
322 m_aNodes_localize.emplace( OString("caption") , true );
323 m_aNodes_localize.emplace( OString("title") , true );
324 m_aNodes_localize.emplace( OString("link") , true );
327 void XMLFile::Extract()
329 m_pXMLStrings.reset( new XMLHashMap );
330 SearchL10NElements( this );
333 void XMLFile::InsertL10NElement( XMLElement* pElement )
335 OString sId, sLanguage("en-US");
336 LangHashMap* pElem;
338 if( pElement->GetAttributeList() != nullptr )
340 for ( size_t j = 0; j < pElement->GetAttributeList()->size(); j++ )
342 const OString sTempStr((*pElement->GetAttributeList())[ j ]->GetName());
343 // Get the "id" Attribute
344 if (sTempStr == "id")
346 sId = (*pElement->GetAttributeList())[ j ]->GetValue();
348 // Get the "xml-lang" Attribute
349 if (sTempStr == XML_LANG)
351 sLanguage = (*pElement->GetAttributeList())[j]->GetValue();
356 else
358 fprintf(stdout,"XMLFile::InsertL10NElement: No AttributeList found");
359 fprintf(stdout,"++++++++++++++++++++++++++++++++++++++++++++++++++");
360 Print( pElement );
361 fprintf(stdout,"++++++++++++++++++++++++++++++++++++++++++++++++++");
364 XMLHashMap::iterator pos = m_pXMLStrings->find( sId );
365 if( pos == m_pXMLStrings->end() ) // No instance, create new one
367 pElem = new LangHashMap;
368 (*pElem)[ sLanguage ]=pElement;
369 m_pXMLStrings->emplace( sId , pElem );
370 m_vOrder.push_back( sId );
372 else // Already there
374 pElem=pos->second;
375 if ( pElem->count(sLanguage) )
377 fprintf(stdout,"Error: Duplicated entry. ID = %s LANG = %s in File %s\n", sId.getStr(), sLanguage.getStr(), m_sFileName.getStr() );
378 exit( -1 );
380 (*pElem)[ sLanguage ]=pElement;
384 XMLFile::XMLFile( const XMLFile& rObj )
385 : XMLParentNode( rObj )
386 , m_sFileName( rObj.m_sFileName )
388 if( this != &rObj )
390 m_aNodes_localize = rObj.m_aNodes_localize;
391 m_vOrder = rObj.m_vOrder;
395 XMLFile& XMLFile::operator=(const XMLFile& rObj)
397 if( this != &rObj )
399 XMLParentNode::operator=(rObj);
401 m_aNodes_localize = rObj.m_aNodes_localize;
402 m_vOrder = rObj.m_vOrder;
404 m_pXMLStrings.reset();
406 if( rObj.m_pXMLStrings )
408 m_pXMLStrings.reset( new XMLHashMap );
409 for (auto const& pos : *rObj.m_pXMLStrings)
411 LangHashMap* pElem=pos.second;
412 LangHashMap* pNewelem = new LangHashMap;
413 for (auto const& pos2 : *pElem)
415 (*pNewelem)[ pos2.first ] = new XMLElement( *pos2.second );
417 (*m_pXMLStrings)[ pos.first ] = pNewelem;
421 return *this;
424 void XMLFile::SearchL10NElements( XMLChildNode *pCur )
426 if ( !pCur )
427 SearchL10NElements( this );
428 else
430 switch( pCur->GetNodeType())
432 case XMLNodeType::XFILE:
434 if( GetChildList())
436 for ( size_t i = 0; i < GetChildList()->size(); i++ )
438 XMLChildNode* pElement = (*GetChildList())[ i ];
439 if( pElement->GetNodeType() == XMLNodeType::ELEMENT )
440 SearchL10NElements( pElement );
444 break;
445 case XMLNodeType::ELEMENT:
447 bool bInsert = true;
448 XMLElement *pElement = static_cast<XMLElement*>(pCur);
449 const OString sName(pElement->GetName().toAsciiLowerCase());
450 if ( pElement->GetAttributeList())
452 for ( size_t j = 0 , cnt = pElement->GetAttributeList()->size(); j < cnt && bInsert; ++j )
454 if ((*pElement->GetAttributeList())[j]->GetName() == "localize")
456 bInsert=false;
457 break;
462 if ( bInsert && ( m_aNodes_localize.find( sName ) != m_aNodes_localize.end() ) )
463 InsertL10NElement(pElement);
464 else if ( bInsert && pElement->GetChildList() )
466 for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ )
467 SearchL10NElements( (*pElement->GetChildList())[ k ] );
470 break;
471 default:
472 break;
477 bool XMLFile::CheckExportStatus( XMLParentNode *pCur )
479 static bool bStatusExport = true;
481 if ( !pCur )
482 CheckExportStatus( this );
483 else {
484 switch( pCur->GetNodeType())
486 case XMLNodeType::XFILE:
488 if( GetChildList())
490 for ( size_t i = 0; i < GetChildList()->size(); i++ )
492 XMLParentNode* pElement = static_cast<XMLParentNode*>((*GetChildList())[ i ]);
493 if( pElement->GetNodeType() == XMLNodeType::ELEMENT ) CheckExportStatus( pElement );//, i);
497 break;
498 case XMLNodeType::ELEMENT:
500 XMLElement *pElement = static_cast<XMLElement*>(pCur);
501 if (pElement->GetName().equalsIgnoreAsciiCase("TOPIC"))
503 if ( pElement->GetAttributeList())
505 for (size_t j = 0 , cnt = pElement->GetAttributeList()->size(); j < cnt; ++j)
507 const OString tmpStr((*pElement->GetAttributeList())[j]->GetName());
508 if (tmpStr.equalsIgnoreAsciiCase("STATUS"))
510 const OString tmpStrVal((*pElement->GetAttributeList())[j]->GetValue());
511 if (!tmpStrVal.equalsIgnoreAsciiCase("PUBLISH") &&
512 !tmpStrVal.equalsIgnoreAsciiCase("DEPRECATED"))
514 bStatusExport = false;
521 else if ( pElement->GetChildList() )
523 for (size_t k = 0; k < pElement->GetChildList()->size(); ++k)
524 CheckExportStatus( static_cast<XMLParentNode*>((*pElement->GetChildList())[k]) );
527 break;
528 default:
529 break;
532 return bStatusExport;
535 XMLElement::XMLElement(
536 const OString &rName, // the element name
537 XMLParentNode *pParent // parent node of this element
539 : XMLParentNode( pParent )
540 , m_sElementName( rName )
544 XMLElement::XMLElement(const XMLElement& rObj)
545 : XMLParentNode( rObj )
546 , m_sElementName( rObj.m_sElementName )
548 if ( rObj.m_pAttributes )
550 m_pAttributes.reset( new XMLAttributeList );
551 for ( size_t i = 0; i < rObj.m_pAttributes->size(); i++ )
552 AddAttribute( (*rObj.m_pAttributes)[ i ]->GetName(), (*rObj.m_pAttributes)[ i ]->GetValue() );
556 XMLElement& XMLElement::operator=(const XMLElement& rObj)
558 if( this !=& rObj )
560 XMLParentNode::operator=(rObj);
561 m_sElementName = rObj.m_sElementName;
563 if ( m_pAttributes )
565 for ( size_t i = 0; i < m_pAttributes->size(); i++ )
566 delete (*m_pAttributes)[ i ];
567 m_pAttributes.reset();
569 if ( rObj.m_pAttributes )
571 m_pAttributes.reset( new XMLAttributeList );
572 for ( size_t i = 0; i < rObj.m_pAttributes->size(); i++ )
573 AddAttribute( (*rObj.m_pAttributes)[ i ]->GetName(), (*rObj.m_pAttributes)[ i ]->GetValue() );
576 return *this;
579 void XMLElement::AddAttribute( const OString &rAttribute, const OString &rValue )
581 if ( !m_pAttributes )
582 m_pAttributes.reset( new XMLAttributeList );
583 m_pAttributes->push_back( new XMLAttribute( rAttribute, rValue ) );
586 void XMLElement::ChangeLanguageTag( const OString &rValue )
588 if ( m_pAttributes )
590 bool bWasSet = false;
591 for (size_t i = 0; i < m_pAttributes->size(); ++i)
593 if ((*m_pAttributes)[ i ]->GetName() == XML_LANG)
595 (*m_pAttributes)[ i ]->setValue(rValue);
596 bWasSet = true;
600 if (!bWasSet)
601 AddAttribute(XML_LANG, rValue);
603 XMLChildNodeList* pCList = GetChildList();
605 if( !pCList )
606 return;
608 for ( size_t i = 0; i < pCList->size(); i++ )
610 XMLChildNode* pNode = (*pCList)[ i ];
611 if( pNode && pNode->GetNodeType() == XMLNodeType::ELEMENT )
613 XMLElement* pElem = static_cast< XMLElement* >(pNode);
614 pElem->ChangeLanguageTag( rValue );
615 pElem = nullptr;
616 pNode = nullptr;
619 pCList = nullptr;
622 XMLElement::~XMLElement()
624 if ( m_pAttributes )
626 for ( size_t i = 0; i < m_pAttributes->size(); i++ )
627 delete (*m_pAttributes)[ i ];
631 OString XMLElement::ToOString()
633 OStringBuffer sBuffer;
634 Print(this,sBuffer,true);
635 return sBuffer.makeStringAndClear();
638 void XMLElement::Print(XMLNode *pCur, OStringBuffer& rBuffer, bool bRootelement ) const
640 if( pCur )
642 if( bRootelement )
644 XMLElement *pElement = static_cast<XMLElement*>(pCur);
645 if ( pElement->GetAttributeList())
647 if ( pElement->GetChildList())
649 for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ )
651 XMLChildNode* pTmp = (*pElement->GetChildList())[ k ];
652 Print( pTmp, rBuffer , false);
657 else
659 switch( pCur->GetNodeType())
661 case XMLNodeType::ELEMENT:
663 XMLElement *pElement = static_cast<XMLElement*>(pCur);
665 if( !pElement->GetName().equalsIgnoreAsciiCase("comment") )
667 rBuffer.append( "<" );
668 rBuffer.append( pElement->GetName() );
669 if ( pElement->GetAttributeList())
671 for ( size_t j = 0; j < pElement->GetAttributeList()->size(); j++ )
673 const OString aAttrName( (*pElement->GetAttributeList())[ j ]->GetName() );
674 if (!aAttrName.equalsIgnoreAsciiCase(XML_LANG))
676 rBuffer.append(
677 " " + aAttrName + "=\"" +
678 (*pElement->GetAttributeList())[ j ]->GetValue() + "\"" );
682 if ( !pElement->GetChildList())
683 rBuffer.append( "/>" );
684 else
686 rBuffer.append( ">" );
687 for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ )
689 XMLChildNode* pTmp = (*pElement->GetChildList())[ k ];
690 Print( pTmp, rBuffer , false);
692 rBuffer.append( "</" + pElement->GetName() + ">" );
696 break;
697 case XMLNodeType::DATA:
699 const XMLData *pData = static_cast<const XMLData*>(pCur);
700 rBuffer.append( pData->GetData() );
702 break;
703 case XMLNodeType::COMMENT:
705 const XMLComment *pComment = static_cast<const XMLComment*>(pCur);
706 rBuffer.append( "<!--" + pComment->GetComment() + "-->" );
708 break;
709 case XMLNodeType::DEFAULT:
711 const XMLDefault *pDefault = static_cast<const XMLDefault*>(pCur);
712 rBuffer.append( pDefault->GetDefault() );
714 break;
715 default:
716 break;
720 else
722 fprintf(stdout,"\n#+------Error: NULL Pointer in XMLELement::Print------+#\n");
723 return;
730 namespace
733 OUString lcl_pathnameToAbsoluteUrl(std::string_view rPathname)
735 OUString sPath = OStringToOUString(rPathname, RTL_TEXTENCODING_UTF8 );
736 OUString sUrl;
737 if (osl::FileBase::getFileURLFromSystemPath(sPath, sUrl)
738 != osl::FileBase::E_None)
740 std::cerr << "Error: Cannot convert input pathname to URL\n";
741 std::exit(EXIT_FAILURE);
743 OUString sCwd;
744 if (osl_getProcessWorkingDir(&sCwd.pData) != osl_Process_E_None)
746 std::cerr << "Error: Cannot determine cwd\n";
747 std::exit(EXIT_FAILURE);
749 if (osl::FileBase::getAbsoluteFileURL(sCwd, sUrl, sUrl)
750 != osl::FileBase::E_None)
752 std::cerr << "Error: Cannot convert input URL to absolute URL\n";
753 std::exit(EXIT_FAILURE);
755 return sUrl;
760 SimpleXMLParser::SimpleXMLParser()
761 : m_pCurNode(nullptr)
762 , m_pCurData(nullptr)
764 m_aParser = XML_ParserCreate( nullptr );
765 XML_SetUserData( m_aParser, this );
766 XML_SetElementHandler( m_aParser, StartElementHandler, EndElementHandler );
767 XML_SetCharacterDataHandler( m_aParser, CharacterDataHandler );
768 XML_SetCommentHandler( m_aParser, CommentHandler );
769 XML_SetDefaultHandler( m_aParser, DefaultHandler );
772 SimpleXMLParser::~SimpleXMLParser()
774 XML_ParserFree( m_aParser );
777 void SimpleXMLParser::StartElementHandler(
778 void *userData, const XML_Char *name, const XML_Char **atts )
780 static_cast<SimpleXMLParser *>(userData)->StartElement( name, atts );
783 void SimpleXMLParser::EndElementHandler(
784 void *userData, const XML_Char * /*name*/ )
786 static_cast<SimpleXMLParser *>(userData)->EndElement();
789 void SimpleXMLParser::CharacterDataHandler(
790 void *userData, const XML_Char *s, int len )
792 static_cast<SimpleXMLParser *>(userData)->CharacterData( s, len );
795 void SimpleXMLParser::CommentHandler(
796 void *userData, const XML_Char *data )
798 static_cast<SimpleXMLParser *>(userData)->Comment( data );
801 void SimpleXMLParser::DefaultHandler(
802 void *userData, const XML_Char *s, int len )
804 static_cast<SimpleXMLParser *>(userData)->Default( s, len );
807 void SimpleXMLParser::StartElement(
808 const XML_Char *name, const XML_Char **atts )
810 XMLElement *pElement = new XMLElement( OString(name), m_pCurNode );
811 m_pCurNode = pElement;
812 m_pCurData = nullptr;
814 int i = 0;
815 while( atts[i] )
817 pElement->AddAttribute( atts[ i ], atts[ i + 1 ] );
818 i += 2;
822 void SimpleXMLParser::EndElement()
824 m_pCurNode = m_pCurNode->GetParent();
825 m_pCurData = nullptr;
828 void SimpleXMLParser::CharacterData( const XML_Char *s, int len )
830 if ( !m_pCurData )
832 OString x( s, len );
833 m_pCurData = new XMLData( helper::UnQuotHTML(x) , m_pCurNode );
835 else
837 OString x( s, len );
838 m_pCurData->AddData( helper::UnQuotHTML(x) );
843 void SimpleXMLParser::Comment( const XML_Char *data )
845 m_pCurData = nullptr;
846 new XMLComment( OString( data ), m_pCurNode );
849 void SimpleXMLParser::Default( const XML_Char *s, int len )
851 m_pCurData = nullptr;
852 new XMLDefault(OString( s, len ), m_pCurNode );
855 bool SimpleXMLParser::Execute( const OString &rFileName, XMLFile* pXMLFile )
857 m_aErrorInformation.m_eCode = XML_ERROR_NONE;
858 m_aErrorInformation.m_nLine = 0;
859 m_aErrorInformation.m_nColumn = 0;
860 m_aErrorInformation.m_sMessage = "ERROR: Unable to open file ";
861 m_aErrorInformation.m_sMessage += rFileName;
863 OUString aFileURL(lcl_pathnameToAbsoluteUrl(rFileName));
865 oslFileHandle h;
866 if (osl_openFile(aFileURL.pData, &h, osl_File_OpenFlag_Read)
867 != osl_File_E_None)
869 return false;
872 sal_uInt64 s;
873 oslFileError e = osl_getFileSize(h, &s);
874 void * p = nullptr;
875 if (e == osl_File_E_None)
877 e = osl_mapFile(h, &p, s, 0, 0);
879 if (e != osl_File_E_None)
881 osl_closeFile(h);
882 return false;
885 pXMLFile->SetName( rFileName );
887 m_pCurNode = pXMLFile;
888 m_pCurData = nullptr;
890 m_aErrorInformation.m_eCode = XML_ERROR_NONE;
891 m_aErrorInformation.m_nLine = 0;
892 m_aErrorInformation.m_nColumn = 0;
893 if ( !pXMLFile->GetName().isEmpty())
895 m_aErrorInformation.m_sMessage = "File " + pXMLFile->GetName() + " parsed successfully";
897 else
898 m_aErrorInformation.m_sMessage = "XML-File parsed successfully";
900 bool result = XML_Parse(m_aParser, static_cast< char * >(p), s, true);
901 if (!result)
903 m_aErrorInformation.m_eCode = XML_GetErrorCode( m_aParser );
904 m_aErrorInformation.m_nLine = XML_GetErrorLineNumber( m_aParser );
905 m_aErrorInformation.m_nColumn = XML_GetErrorColumnNumber( m_aParser );
907 m_aErrorInformation.m_sMessage = "ERROR: ";
908 if ( !pXMLFile->GetName().isEmpty())
909 m_aErrorInformation.m_sMessage += pXMLFile->GetName();
910 else
911 m_aErrorInformation.m_sMessage += "XML-File (";
913 m_aErrorInformation.m_sMessage +=
914 OString::number(sal::static_int_cast< sal_Int64 >(m_aErrorInformation.m_nLine)) + "," +
915 OString::number(sal::static_int_cast< sal_Int64 >(m_aErrorInformation.m_nColumn)) + "): ";
917 switch (m_aErrorInformation.m_eCode)
919 case XML_ERROR_NO_MEMORY:
920 m_aErrorInformation.m_sMessage += "No memory";
921 break;
922 case XML_ERROR_SYNTAX:
923 m_aErrorInformation.m_sMessage += "Syntax";
924 break;
925 case XML_ERROR_NO_ELEMENTS:
926 m_aErrorInformation.m_sMessage += "No elements";
927 break;
928 case XML_ERROR_INVALID_TOKEN:
929 m_aErrorInformation.m_sMessage += "Invalid token";
930 break;
931 case XML_ERROR_UNCLOSED_TOKEN:
932 m_aErrorInformation.m_sMessage += "Unclosed token";
933 break;
934 case XML_ERROR_PARTIAL_CHAR:
935 m_aErrorInformation.m_sMessage += "Partial char";
936 break;
937 case XML_ERROR_TAG_MISMATCH:
938 m_aErrorInformation.m_sMessage += "Tag mismatch";
939 break;
940 case XML_ERROR_DUPLICATE_ATTRIBUTE:
941 m_aErrorInformation.m_sMessage += "Duplicated attribute";
942 break;
943 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
944 m_aErrorInformation.m_sMessage += "Junk after doc element";
945 break;
946 case XML_ERROR_PARAM_ENTITY_REF:
947 m_aErrorInformation.m_sMessage += "Param entity ref";
948 break;
949 case XML_ERROR_UNDEFINED_ENTITY:
950 m_aErrorInformation.m_sMessage += "Undefined entity";
951 break;
952 case XML_ERROR_RECURSIVE_ENTITY_REF:
953 m_aErrorInformation.m_sMessage += "Recursive entity ref";
954 break;
955 case XML_ERROR_ASYNC_ENTITY:
956 m_aErrorInformation.m_sMessage += "Async_entity";
957 break;
958 case XML_ERROR_BAD_CHAR_REF:
959 m_aErrorInformation.m_sMessage += "Bad char ref";
960 break;
961 case XML_ERROR_BINARY_ENTITY_REF:
962 m_aErrorInformation.m_sMessage += "Binary entity";
963 break;
964 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
965 m_aErrorInformation.m_sMessage += "Attribute external entity ref";
966 break;
967 case XML_ERROR_MISPLACED_XML_PI:
968 m_aErrorInformation.m_sMessage += "Misplaced xml pi";
969 break;
970 case XML_ERROR_UNKNOWN_ENCODING:
971 m_aErrorInformation.m_sMessage += "Unknown encoding";
972 break;
973 case XML_ERROR_INCORRECT_ENCODING:
974 m_aErrorInformation.m_sMessage += "Incorrect encoding";
975 break;
976 case XML_ERROR_UNCLOSED_CDATA_SECTION:
977 m_aErrorInformation.m_sMessage += "Unclosed cdata section";
978 break;
979 case XML_ERROR_EXTERNAL_ENTITY_HANDLING:
980 m_aErrorInformation.m_sMessage += "External entity handling";
981 break;
982 case XML_ERROR_NOT_STANDALONE:
983 m_aErrorInformation.m_sMessage += "Not standalone";
984 break;
985 case XML_ERROR_NONE:
986 break;
987 default:
988 break;
992 osl_unmapMappedFile(h, p, s);
993 osl_closeFile(h);
995 return result;
998 namespace
1001 icu::UnicodeString lcl_QuotRange(
1002 const icu::UnicodeString& rString, const sal_Int32 nStart,
1003 const sal_Int32 nEnd, bool bInsideTag = false )
1005 icu::UnicodeString sReturn;
1006 assert( nStart < nEnd );
1007 assert( nStart >= 0 );
1008 assert( nEnd <= rString.length() );
1009 for (sal_Int32 i = nStart; i < nEnd; ++i)
1011 switch (rString[i])
1013 case '<':
1014 sReturn.append("&lt;");
1015 break;
1016 case '>':
1017 sReturn.append("&gt;");
1018 break;
1019 case '"':
1020 if( !bInsideTag )
1021 sReturn.append("&quot;");
1022 else
1023 sReturn.append(rString[i]);
1024 break;
1025 case '&':
1026 if (rString.startsWith("&amp;", i, 5))
1027 sReturn.append('&');
1028 else
1029 sReturn.append("&amp;");
1030 break;
1031 default:
1032 sReturn.append(rString[i]);
1033 break;
1036 return sReturn;
1039 bool lcl_isTag( const icu::UnicodeString& rString )
1041 static const int nSize = 20;
1042 static const icu::UnicodeString vTags[nSize] = {
1043 "ahelp", "link", "item", "emph", "defaultinline",
1044 "switchinline", "caseinline", "variable",
1045 "bookmark_value", "image", "object",
1046 "embedvar", "alt", "sup", "sub",
1047 "menuitem", "keycode", "input", "literal", "widget"
1050 for( int nIndex = 0; nIndex < nSize; ++nIndex )
1052 if( rString.startsWith("<" + vTags[nIndex]) ||
1053 rString == "</" + vTags[nIndex] + ">" )
1054 return true;
1057 return rString == "<br/>" || rString =="<help-id-missing/>";
1060 } /// anonymous namespace
1062 OString XMLUtil::QuotHTML( const OString &rString )
1064 if( rString.trim().isEmpty() )
1065 return rString;
1066 UErrorCode nIcuErr = U_ZERO_ERROR;
1067 static const sal_uInt32 nSearchFlags =
1068 UREGEX_DOTALL | UREGEX_CASE_INSENSITIVE;
1069 static const icu::UnicodeString sSearchPat( "<[/]\?\?[a-z_-]+?(?:| +[a-z]+?=\".*?\") *[/]\?\?>" );
1071 const OUString sOUSource = OStringToOUString(rString, RTL_TEXTENCODING_UTF8);
1072 icu::UnicodeString sSource(
1073 reinterpret_cast<const UChar*>(
1074 sOUSource.getStr()), sOUSource.getLength() );
1076 icu::RegexMatcher aRegexMatcher( sSearchPat, nSearchFlags, nIcuErr );
1077 aRegexMatcher.reset( sSource );
1079 icu::UnicodeString sReturn;
1080 int32_t nEndPos = 0;
1081 int32_t nStartPos = 0;
1082 while( aRegexMatcher.find(nStartPos, nIcuErr) && U_SUCCESS(nIcuErr) )
1084 nStartPos = aRegexMatcher.start(nIcuErr);
1085 if ( nEndPos < nStartPos )
1086 sReturn.append(lcl_QuotRange(sSource, nEndPos, nStartPos));
1087 nEndPos = aRegexMatcher.end(nIcuErr);
1088 icu::UnicodeString sMatch = aRegexMatcher.group(nIcuErr);
1089 if( lcl_isTag(sMatch) )
1091 sReturn.append("<");
1092 sReturn.append(lcl_QuotRange(sSource, nStartPos+1, nEndPos-1, true));
1093 sReturn.append(">");
1095 else
1096 sReturn.append(lcl_QuotRange(sSource, nStartPos, nEndPos));
1097 nStartPos = nEndPos;
1099 if( nEndPos < sSource.length() )
1100 sReturn.append(lcl_QuotRange(sSource, nEndPos, sSource.length()));
1101 sReturn.append('\0');
1102 return
1103 OUStringToOString(
1104 reinterpret_cast<const sal_Unicode*>(sReturn.getBuffer()),
1105 RTL_TEXTENCODING_UTF8);
1108 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */