crashtesting: crash seen on exporting forum-it-5909.ods to xlsx
[LibreOffice.git] / l10ntools / source / po.cxx
blobdfe013cb9b7c774e2804310113c39d4847082249
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
10 #include <rtl/ustring.hxx>
11 #include <rtl/crc.h>
12 #include <sal/log.hxx>
14 #include <cstring>
15 #include <ctime>
16 #include <cassert>
18 #include <vector>
19 #include <string>
20 #include <string_view>
22 #include <po.hxx>
23 #include <helper.hxx>
25 /** Container of po entry
27 Provide all file operations related to LibreOffice specific
28 po entry and store it's attributes.
30 class GenPoEntry
32 private:
33 OStringBuffer m_sExtractCom;
34 std::vector<OString> m_sReferences;
35 OString m_sMsgCtxt;
36 OString m_sMsgId;
37 OString m_sMsgIdPlural;
38 OString m_sMsgStr;
39 std::vector<OString> m_sMsgStrPlural;
40 bool m_bFuzzy;
41 bool m_bCFormat;
42 bool m_bNull;
44 public:
45 GenPoEntry();
47 const std::vector<OString>& getReference() const { return m_sReferences; }
48 const OString& getMsgCtxt() const { return m_sMsgCtxt; }
49 const OString& getMsgId() const { return m_sMsgId; }
50 const OString& getMsgStr() const { return m_sMsgStr; }
51 bool isFuzzy() const { return m_bFuzzy; }
52 bool isNull() const { return m_bNull; }
54 void setExtractCom(std::string_view rExtractCom)
56 m_sExtractCom = rExtractCom;
58 void setReference(const OString& rReference)
60 m_sReferences.push_back(rReference);
62 void setMsgCtxt(const OString& rMsgCtxt)
64 m_sMsgCtxt = rMsgCtxt;
66 void setMsgId(const OString& rMsgId)
68 m_sMsgId = rMsgId;
70 void setMsgStr(const OString& rMsgStr)
72 m_sMsgStr = rMsgStr;
75 void writeToFile(std::ofstream& rOFStream) const;
76 void readFromFile(std::ifstream& rIFStream);
79 namespace
81 // Convert a normal string to msg/po output string
82 OString lcl_GenMsgString(const OString& rString)
84 if ( rString.isEmpty() )
85 return "\"\"";
87 OString sResult =
88 "\"" +
89 helper::escapeAll(rString,"\n""\t""\r""\\""\"","\\n""\\t""\\r""\\\\""\\\"") +
90 "\"";
91 sal_Int32 nIndex = 0;
92 while((nIndex=sResult.indexOf("\\n",nIndex))!=-1)
94 if( !sResult.match("\\\\n", nIndex-1) &&
95 nIndex!=sResult.getLength()-3)
97 sResult = sResult.replaceAt(nIndex,2,"\\n\"\n\"");
99 ++nIndex;
102 if ( sResult.indexOf('\n') != -1 )
103 return "\"\"\n" + sResult;
105 return sResult;
108 // Convert msg string to normal form
109 OString lcl_GenNormString(const OString& rString)
111 return
112 helper::unEscapeAll(
113 rString.copy(1,rString.getLength()-2),
114 "\\n""\\t""\\r""\\\\""\\\"",
115 "\n""\t""\r""\\""\"");
119 GenPoEntry::GenPoEntry()
120 : m_bFuzzy( false )
121 , m_bCFormat( false )
122 , m_bNull( false )
126 void GenPoEntry::writeToFile(std::ofstream& rOFStream) const
128 if ( rOFStream.tellp() != std::ofstream::pos_type( 0 ))
129 rOFStream << std::endl;
130 if ( !m_sExtractCom.isEmpty() )
131 rOFStream
132 << "#. "
133 << m_sExtractCom.toString().replaceAll("\n","\n#. ") << std::endl;
134 for(const auto& rReference : m_sReferences)
135 rOFStream << "#: " << rReference << std::endl;
136 if ( m_bFuzzy )
137 rOFStream << "#, fuzzy" << std::endl;
138 if ( m_bCFormat )
139 rOFStream << "#, c-format" << std::endl;
140 if ( !m_sMsgCtxt.isEmpty() )
141 rOFStream << "msgctxt "
142 << lcl_GenMsgString(m_sMsgCtxt)
143 << std::endl;
144 rOFStream << "msgid "
145 << lcl_GenMsgString(m_sMsgId) << std::endl;
146 if ( !m_sMsgIdPlural.isEmpty() )
147 rOFStream << "msgid_plural "
148 << lcl_GenMsgString(m_sMsgIdPlural)
149 << std::endl;
150 if ( !m_sMsgStrPlural.empty() )
151 for(auto & line : m_sMsgStrPlural)
152 rOFStream << line.copy(0,10) << lcl_GenMsgString(line.copy(10)) << std::endl;
153 else
154 rOFStream << "msgstr "
155 << lcl_GenMsgString(m_sMsgStr) << std::endl;
158 void GenPoEntry::readFromFile(std::ifstream& rIFStream)
160 *this = GenPoEntry();
161 OString* pLastMsg = nullptr;
162 std::string sTemp;
163 getline(rIFStream,sTemp);
164 if( rIFStream.eof() || sTemp.empty() )
166 m_bNull = true;
167 return;
169 while(!rIFStream.eof())
171 OString sLine(sTemp.data(),sTemp.length());
172 if (sLine.startsWith("#. "))
174 if( !m_sExtractCom.isEmpty() )
176 m_sExtractCom.append("\n");
178 m_sExtractCom.append(sLine.subView(3));
180 else if (sLine.startsWith("#: "))
182 m_sReferences.push_back(sLine.copy(3));
184 else if (sLine.startsWith("#, fuzzy"))
186 m_bFuzzy = true;
188 else if (sLine.startsWith("#, c-format"))
190 m_bCFormat = true;
192 else if (sLine.startsWith("msgctxt "))
194 m_sMsgCtxt = lcl_GenNormString(sLine.copy(8));
195 pLastMsg = &m_sMsgCtxt;
197 else if (sLine.startsWith("msgid "))
199 m_sMsgId = lcl_GenNormString(sLine.copy(6));
200 pLastMsg = &m_sMsgId;
202 else if (sLine.startsWith("msgid_plural "))
204 m_sMsgIdPlural = lcl_GenNormString(sLine.copy(13));
205 pLastMsg = &m_sMsgIdPlural;
207 else if (sLine.startsWith("msgstr "))
209 m_sMsgStr = lcl_GenNormString(sLine.copy(7));
210 pLastMsg = &m_sMsgStr;
212 else if (sLine.startsWith("msgstr["))
214 // assume there are no more than 10 plural forms...
215 // and that plural strings are never split to multi-line in po
216 m_sMsgStrPlural.push_back(sLine.subView(0,10) + lcl_GenNormString(sLine.copy(10)));
218 else if (sLine.startsWith("\"") && pLastMsg)
220 OString sReference;
221 if (!m_sReferences.empty())
223 sReference = m_sReferences.front();
225 if (pLastMsg != &m_sMsgCtxt || sLine != OStringConcatenation("\"" + sReference + "\\n\""))
227 *pLastMsg += lcl_GenNormString(sLine);
230 else
231 break;
232 getline(rIFStream,sTemp);
236 PoEntry::PoEntry()
237 : m_bIsInitialized( false )
241 PoEntry::PoEntry(
242 const OString& rSourceFile, std::string_view rResType, std::string_view rGroupId,
243 std::string_view rLocalId, const OString& rHelpText,
244 const OString& rText, const TYPE eType )
245 : m_bIsInitialized( false )
247 if( rSourceFile.isEmpty() )
248 throw NOSOURCFILE;
249 else if ( rResType.empty() )
250 throw NORESTYPE;
251 else if ( rGroupId.empty() )
252 throw NOGROUPID;
253 else if ( rText.isEmpty() )
254 throw NOSTRING;
255 else if ( rHelpText.getLength() == 5 )
256 throw WRONGHELPTEXT;
258 m_pGenPo.reset( new GenPoEntry() );
259 OString sReference = rSourceFile.copy(rSourceFile.lastIndexOf('/')+1);
260 m_pGenPo->setReference(sReference);
262 OString sMsgCtxt =
263 sReference + "\n" +
264 rGroupId + "\n" +
265 (rLocalId.empty() ? OString() : OString::Concat(rLocalId) + "\n") +
266 rResType;
267 switch(eType){
268 case TTEXT:
269 sMsgCtxt += ".text"; break;
270 case TQUICKHELPTEXT:
271 sMsgCtxt += ".quickhelptext"; break;
272 case TTITLE:
273 sMsgCtxt += ".title"; break;
274 // Default case is unneeded because the type of eType has only three element
276 m_pGenPo->setMsgCtxt(sMsgCtxt);
277 m_pGenPo->setMsgId(rText);
278 m_pGenPo->setExtractCom(OStringConcatenation(
279 ( !rHelpText.isEmpty() ? rHelpText + "\n" : OString()) +
280 genKeyId( m_pGenPo->getReference().front() + rGroupId + rLocalId + rResType + rText ) ));
281 m_bIsInitialized = true;
284 PoEntry::~PoEntry()
288 PoEntry::PoEntry( const PoEntry& rPo )
289 : m_pGenPo( rPo.m_pGenPo ? new GenPoEntry( *(rPo.m_pGenPo) ) : nullptr )
290 , m_bIsInitialized( rPo.m_bIsInitialized )
294 PoEntry& PoEntry::operator=(const PoEntry& rPo)
296 if( this == &rPo )
298 return *this;
300 if( rPo.m_pGenPo )
302 if( m_pGenPo )
304 *m_pGenPo = *(rPo.m_pGenPo);
306 else
308 m_pGenPo.reset( new GenPoEntry( *(rPo.m_pGenPo) ) );
311 else
313 m_pGenPo.reset();
315 m_bIsInitialized = rPo.m_bIsInitialized;
316 return *this;
319 PoEntry& PoEntry::operator=(PoEntry&& rPo) noexcept
321 m_pGenPo = std::move(rPo.m_pGenPo);
322 m_bIsInitialized = std::move(rPo.m_bIsInitialized);
323 return *this;
326 OString const & PoEntry::getSourceFile() const
328 assert( m_bIsInitialized );
329 return m_pGenPo->getReference().front();
332 OString PoEntry::getGroupId() const
334 assert( m_bIsInitialized );
335 return m_pGenPo->getMsgCtxt().getToken(0,'\n');
338 OString PoEntry::getLocalId() const
340 assert( m_bIsInitialized );
341 const OString sMsgCtxt = m_pGenPo->getMsgCtxt();
342 if (sMsgCtxt.indexOf('\n')==sMsgCtxt.lastIndexOf('\n'))
343 return OString();
344 else
345 return sMsgCtxt.getToken(1,'\n');
348 OString PoEntry::getResourceType() const
350 assert( m_bIsInitialized );
351 const OString sMsgCtxt = m_pGenPo->getMsgCtxt();
352 if (sMsgCtxt.indexOf('\n')==sMsgCtxt.lastIndexOf('\n'))
353 return sMsgCtxt.getToken(1,'\n').getToken(0,'.');
354 else
355 return sMsgCtxt.getToken(2,'\n').getToken(0,'.');
358 PoEntry::TYPE PoEntry::getType() const
360 assert( m_bIsInitialized );
361 const OString sMsgCtxt = m_pGenPo->getMsgCtxt();
362 const OString sType = sMsgCtxt.copy( sMsgCtxt.lastIndexOf('.') + 1 );
363 assert(
364 (sType == "text" || sType == "quickhelptext" || sType == "title") );
365 if ( sType == "text" )
366 return TTEXT;
367 else if ( sType == "quickhelptext" )
368 return TQUICKHELPTEXT;
369 else
370 return TTITLE;
373 bool PoEntry::isFuzzy() const
375 assert( m_bIsInitialized );
376 return m_pGenPo->isFuzzy();
379 // Get message context
380 const OString& PoEntry::getMsgCtxt() const
382 assert( m_bIsInitialized );
383 return m_pGenPo->getMsgCtxt();
387 // Get translation string in merge format
388 OString const & PoEntry::getMsgId() const
390 assert( m_bIsInitialized );
391 return m_pGenPo->getMsgId();
394 // Get translated string in merge format
395 const OString& PoEntry::getMsgStr() const
397 assert( m_bIsInitialized );
398 return m_pGenPo->getMsgStr();
402 bool PoEntry::IsInSameComp(const PoEntry& rPo1,const PoEntry& rPo2)
404 assert( rPo1.m_bIsInitialized && rPo2.m_bIsInitialized );
405 return ( rPo1.getSourceFile() == rPo2.getSourceFile() &&
406 rPo1.getGroupId() == rPo2.getGroupId() &&
407 rPo1.getLocalId() == rPo2.getLocalId() &&
408 rPo1.getResourceType() == rPo2.getResourceType() );
411 OString PoEntry::genKeyId(const OString& rGenerator)
413 sal_uInt32 nCRC = rtl_crc32(0, rGenerator.getStr(), rGenerator.getLength());
414 // Use simple ASCII characters, exclude I, l, 1 and O, 0 to avoid confusing IDs
415 static const char sSymbols[] =
416 "ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz23456789";
417 char sKeyId[6];
418 for( short nKeyInd = 0; nKeyInd < 5; ++nKeyInd )
420 sKeyId[nKeyInd] = sSymbols[(nCRC & 63) % strlen(sSymbols)];
421 nCRC >>= 6;
423 sKeyId[5] = '\0';
424 return OString(sKeyId);
427 namespace
429 // Get actual time in "YEAR-MO-DA HO:MI+ZONE" form
430 OString lcl_GetTime()
432 time_t aNow = time(nullptr);
433 struct tm* pNow = localtime(&aNow);
434 char pBuff[50];
435 strftime( pBuff, sizeof pBuff, "%Y-%m-%d %H:%M%z", pNow );
436 return OString(pBuff);
440 // when updating existing files (pocheck), reuse provided po-header
441 PoHeader::PoHeader( std::string_view rExtSrc, const OString& rPoHeaderMsgStr )
442 : m_pGenPo( new GenPoEntry() )
443 , m_bIsInitialized( false )
445 m_pGenPo->setExtractCom(OStringConcatenation(OString::Concat("extracted from ") + rExtSrc));
446 m_pGenPo->setMsgStr(rPoHeaderMsgStr);
447 m_bIsInitialized = true;
450 PoHeader::PoHeader( std::string_view rExtSrc )
451 : m_pGenPo( new GenPoEntry() )
452 , m_bIsInitialized( false )
454 m_pGenPo->setExtractCom(OStringConcatenation(OString::Concat("extracted from ") + rExtSrc));
455 m_pGenPo->setMsgStr(
456 "Project-Id-Version: PACKAGE VERSION\n"
457 "Report-Msgid-Bugs-To: https://bugs.libreoffice.org/enter_bug.cgi?"
458 "product=LibreOffice&bug_status=UNCONFIRMED&component=UI\n"
459 "POT-Creation-Date: " + lcl_GetTime() +
460 "\nPO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
461 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
462 "Language-Team: LANGUAGE <LL@li.org>\n"
463 "MIME-Version: 1.0\n"
464 "Content-Type: text/plain; charset=UTF-8\n"
465 "Content-Transfer-Encoding: 8bit\n"
466 "X-Accelerator-Marker: ~\n"
467 "X-Generator: LibreOffice\n");
468 m_bIsInitialized = true;
471 PoHeader::~PoHeader()
475 PoOfstream::PoOfstream()
476 : m_bIsAfterHeader( false )
480 PoOfstream::PoOfstream(const OString& rFileName, OpenMode aMode )
481 : m_bIsAfterHeader( false )
483 open( rFileName, aMode );
486 PoOfstream::~PoOfstream()
488 if( isOpen() )
490 close();
494 void PoOfstream::open(const OString& rFileName, OpenMode aMode )
496 assert( !isOpen() );
497 if( aMode == TRUNC )
499 m_aOutPut.open( rFileName.getStr(),
500 std::ios_base::out | std::ios_base::trunc );
501 m_bIsAfterHeader = false;
503 else if( aMode == APP )
505 m_aOutPut.open( rFileName.getStr(),
506 std::ios_base::out | std::ios_base::app );
507 m_bIsAfterHeader = m_aOutPut.tellp() != std::ofstream::pos_type( 0 );
511 void PoOfstream::close()
513 assert( isOpen() );
514 m_aOutPut.close();
517 void PoOfstream::writeHeader(const PoHeader& rPoHeader)
519 assert( isOpen() && !m_bIsAfterHeader && rPoHeader.m_bIsInitialized );
520 rPoHeader.m_pGenPo->writeToFile( m_aOutPut );
521 m_bIsAfterHeader = true;
524 void PoOfstream::writeEntry( const PoEntry& rPoEntry )
526 assert( isOpen() && m_bIsAfterHeader && rPoEntry.m_bIsInitialized );
527 rPoEntry.m_pGenPo->writeToFile( m_aOutPut );
530 namespace
533 // Check the validity of read entry
534 bool lcl_CheckInputEntry(const GenPoEntry& rEntry)
536 // stock button labels don't have a reference/sourcefile - they are not extracted from ui files
537 // (explicitly skipped by solenv/bin/uiex) but instead inserted by l10ntools/source/localize.cxx
538 // into all module templates (see d5d905b480c2a9b1db982f2867e87b5c230d1ab9)
539 return !rEntry.getMsgCtxt().isEmpty() &&
540 (rEntry.getMsgCtxt() == "stock" || !rEntry.getReference().empty()) &&
541 !rEntry.getMsgId().isEmpty();
546 PoIfstream::PoIfstream()
547 : m_bEof( false )
551 PoIfstream::PoIfstream(const OString& rFileName)
552 : m_bEof( false )
554 open( rFileName );
557 PoIfstream::~PoIfstream()
559 if( isOpen() )
561 close();
565 void PoIfstream::open( const OString& rFileName, OString& rPoHeader )
567 assert( !isOpen() );
568 m_aInPut.open( rFileName.getStr(), std::ios_base::in );
570 // capture header, updating timestamp and generator
571 std::string sTemp;
572 std::getline(m_aInPut,sTemp);
573 while( !sTemp.empty() && !m_aInPut.eof() )
575 std::getline(m_aInPut,sTemp);
576 OString sLine(sTemp.data(),sTemp.length());
577 if (sLine.startsWith("\"PO-Revision-Date"))
578 rPoHeader += "PO-Revision-Date: " + lcl_GetTime() + "\n";
579 else if (sLine.startsWith("\"X-Generator"))
580 rPoHeader += "X-Generator: LibreOffice\n";
581 else if (sLine.startsWith("\""))
582 rPoHeader += lcl_GenNormString(sLine);
584 m_bEof = false;
587 void PoIfstream::open( const OString& rFileName )
589 assert( !isOpen() );
590 m_aInPut.open( rFileName.getStr(), std::ios_base::in );
592 // Skip header
593 std::string sTemp;
594 std::getline(m_aInPut,sTemp);
595 while( !sTemp.empty() && !m_aInPut.eof() )
597 std::getline(m_aInPut,sTemp);
599 m_bEof = false;
602 void PoIfstream::close()
604 assert( isOpen() );
605 m_aInPut.close();
608 void PoIfstream::readEntry( PoEntry& rPoEntry )
610 assert( isOpen() && !eof() );
611 GenPoEntry aGenPo;
612 aGenPo.readFromFile( m_aInPut );
613 if( aGenPo.isNull() )
615 m_bEof = true;
616 rPoEntry = PoEntry();
618 else
620 if( lcl_CheckInputEntry(aGenPo) )
622 if( rPoEntry.m_pGenPo )
624 *(rPoEntry.m_pGenPo) = aGenPo;
626 else
628 rPoEntry.m_pGenPo.reset( new GenPoEntry( aGenPo ) );
630 rPoEntry.m_bIsInitialized = true;
632 else
634 SAL_WARN("l10ntools", "Parse problem with entry: " << aGenPo.getMsgStr());
635 throw PoIfstream::Exception();
640 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */