tdf#42893: sw_autocorrect: Add unittest
[LibreOffice.git] / linguistic / source / gciterator.cxx
bloba3a18ddecaac6be4fdac569dccada9ed7d632886
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <sal/macros.h>
21 #include <com/sun/star/beans/XPropertySet.hpp>
22 #include <com/sun/star/container/ElementExistException.hpp>
23 #include <com/sun/star/container/XNameAccess.hpp>
24 #include <com/sun/star/configuration/theDefaultProvider.hpp>
25 #include <com/sun/star/i18n/BreakIterator.hpp>
26 #include <com/sun/star/lang/IndexOutOfBoundsException.hpp>
27 #include <com/sun/star/lang/XComponent.hpp>
28 #include <com/sun/star/lang/XServiceInfo.hpp>
29 #include <com/sun/star/lang/XMultiServiceFactory.hpp>
30 #include <com/sun/star/linguistic2/XDictionary.hpp>
31 #include <com/sun/star/linguistic2/XSupportedLocales.hpp>
32 #include <com/sun/star/linguistic2/XProofreader.hpp>
33 #include <com/sun/star/linguistic2/XProofreadingIterator.hpp>
34 #include <com/sun/star/linguistic2/SingleProofreadingError.hpp>
35 #include <com/sun/star/linguistic2/ProofreadingResult.hpp>
36 #include <com/sun/star/linguistic2/LinguServiceEvent.hpp>
37 #include <com/sun/star/linguistic2/LinguServiceEventFlags.hpp>
38 #include <com/sun/star/text/TextMarkupType.hpp>
39 #include <com/sun/star/text/TextMarkupDescriptor.hpp>
40 #include <com/sun/star/text/XMultiTextMarkup.hpp>
41 #include <com/sun/star/text/XFlatParagraph.hpp>
42 #include <com/sun/star/text/XFlatParagraphIterator.hpp>
43 #include <com/sun/star/uno/XComponentContext.hpp>
45 #include <sal/config.h>
46 #include <sal/log.hxx>
47 #include <o3tl/safeint.hxx>
48 #include <osl/conditn.hxx>
49 #include <cppuhelper/supportsservice.hxx>
50 #include <cppuhelper/weak.hxx>
51 #include <i18nlangtag/languagetag.hxx>
52 #include <comphelper/processfactory.hxx>
53 #include <comphelper/propertysequence.hxx>
54 #include <tools/debug.hxx>
55 #include <comphelper/diagnose_ex.hxx>
57 #include <map>
59 #include <linguistic/misc.hxx>
61 #include "gciterator.hxx"
63 using namespace linguistic;
64 using namespace ::com::sun::star;
66 // white space list: obtained from the fonts.config.txt of a Linux system.
67 const sal_Unicode aWhiteSpaces[] =
69 0x0020, /* SPACE */
70 0x00a0, /* NO-BREAK SPACE */
71 0x00ad, /* SOFT HYPHEN */
72 0x115f, /* HANGUL CHOSEONG FILLER */
73 0x1160, /* HANGUL JUNGSEONG FILLER */
74 0x1680, /* OGHAM SPACE MARK */
75 0x2000, /* EN QUAD */
76 0x2001, /* EM QUAD */
77 0x2002, /* EN SPACE */
78 0x2003, /* EM SPACE */
79 0x2004, /* THREE-PER-EM SPACE */
80 0x2005, /* FOUR-PER-EM SPACE */
81 0x2006, /* SIX-PER-EM SPACE */
82 0x2007, /* FIGURE SPACE */
83 0x2008, /* PUNCTUATION SPACE */
84 0x2009, /* THIN SPACE */
85 0x200a, /* HAIR SPACE */
86 0x200b, /* ZERO WIDTH SPACE */
87 0x200c, /* ZERO WIDTH NON-JOINER */
88 0x200d, /* ZERO WIDTH JOINER */
89 0x200e, /* LEFT-TO-RIGHT MARK */
90 0x200f, /* RIGHT-TO-LEFT MARK */
91 0x2028, /* LINE SEPARATOR */
92 0x2029, /* PARAGRAPH SEPARATOR */
93 0x202a, /* LEFT-TO-RIGHT EMBEDDING */
94 0x202b, /* RIGHT-TO-LEFT EMBEDDING */
95 0x202c, /* POP DIRECTIONAL FORMATTING */
96 0x202d, /* LEFT-TO-RIGHT OVERRIDE */
97 0x202e, /* RIGHT-TO-LEFT OVERRIDE */
98 0x202f, /* NARROW NO-BREAK SPACE */
99 0x205f, /* MEDIUM MATHEMATICAL SPACE */
100 0x2060, /* WORD JOINER */
101 0x2061, /* FUNCTION APPLICATION */
102 0x2062, /* INVISIBLE TIMES */
103 0x2063, /* INVISIBLE SEPARATOR */
104 0x206A, /* INHIBIT SYMMETRIC SWAPPING */
105 0x206B, /* ACTIVATE SYMMETRIC SWAPPING */
106 0x206C, /* INHIBIT ARABIC FORM SHAPING */
107 0x206D, /* ACTIVATE ARABIC FORM SHAPING */
108 0x206E, /* NATIONAL DIGIT SHAPES */
109 0x206F, /* NOMINAL DIGIT SHAPES */
110 0x3000, /* IDEOGRAPHIC SPACE */
111 0x3164, /* HANGUL FILLER */
112 0xfeff, /* ZERO WIDTH NO-BREAK SPACE */
113 0xffa0, /* HALFWIDTH HANGUL FILLER */
114 0xfff9, /* INTERLINEAR ANNOTATION ANCHOR */
115 0xfffa, /* INTERLINEAR ANNOTATION SEPARATOR */
116 0xfffb /* INTERLINEAR ANNOTATION TERMINATOR */
119 // Information about reason for proofreading (ProofInfo)
120 const sal_Int32 PROOFINFO_GET_PROOFRESULT = 1;
121 const sal_Int32 PROOFINFO_MARK_PARAGRAPH = 2;
123 const int nWhiteSpaces = SAL_N_ELEMENTS( aWhiteSpaces );
125 static bool lcl_IsWhiteSpace( sal_Unicode cChar )
127 bool bFound = false;
128 for (int i = 0; i < nWhiteSpaces && !bFound; ++i)
130 if (cChar == aWhiteSpaces[i])
131 bFound = true;
133 return bFound;
136 static sal_Int32 lcl_SkipWhiteSpaces( const OUString &rText, sal_Int32 nStartPos )
138 // note having nStartPos point right behind the string is OK since that one
139 // is a correct end-of-sentence position to be returned from a grammar checker...
141 const sal_Int32 nLen = rText.getLength();
142 bool bIllegalArgument = false;
143 if (nStartPos < 0)
145 bIllegalArgument = true;
146 nStartPos = 0;
148 if (nStartPos > nLen)
150 bIllegalArgument = true;
151 nStartPos = nLen;
153 if (bIllegalArgument)
155 SAL_WARN( "linguistic", "lcl_SkipWhiteSpaces: illegal arguments" );
158 sal_Int32 nRes = nStartPos;
159 if (0 <= nStartPos && nStartPos < nLen)
161 const sal_Unicode* const pEnd = rText.getStr() + nLen;
162 const sal_Unicode *pText = rText.getStr() + nStartPos;
163 while (pText != pEnd && lcl_IsWhiteSpace(*pText))
164 ++pText;
165 nRes = pText - rText.getStr();
168 DBG_ASSERT( 0 <= nRes && nRes <= nLen, "lcl_SkipWhiteSpaces return value out of range" );
169 return nRes;
172 static sal_Int32 lcl_BacktraceWhiteSpaces( const OUString &rText, sal_Int32 nStartPos )
174 // note: having nStartPos point right behind the string is OK since that one
175 // is a correct end-of-sentence position to be returned from a grammar checker...
177 const sal_Int32 nLen = rText.getLength();
178 bool bIllegalArgument = false;
179 if (nStartPos < 0)
181 bIllegalArgument = true;
182 nStartPos = 0;
184 if (nStartPos > nLen)
186 bIllegalArgument = true;
187 nStartPos = nLen;
189 if (bIllegalArgument)
191 SAL_WARN( "linguistic", "lcl_BacktraceWhiteSpaces: illegal arguments" );
194 sal_Int32 nRes = nStartPos;
195 sal_Int32 nPosBefore = nStartPos - 1;
196 const sal_Unicode *pStart = rText.getStr();
197 if (0 <= nPosBefore && nPosBefore < nLen && lcl_IsWhiteSpace( pStart[ nPosBefore ] ))
199 nStartPos = nPosBefore;
200 const sal_Unicode *pText = rText.getStr() + nStartPos;
201 while (pText > pStart && lcl_IsWhiteSpace( *pText ))
202 --pText;
203 // now add 1 since we want to point to the first char after the last char in the sentence...
204 nRes = pText - pStart + 1;
207 DBG_ASSERT( 0 <= nRes && nRes <= nLen, "lcl_BacktraceWhiteSpaces return value out of range" );
208 return nRes;
212 extern "C" {
214 static void lcl_workerfunc (void * gci)
216 osl_setThreadName("GrammarCheckingIterator");
218 static_cast<GrammarCheckingIterator*>(gci)->DequeueAndCheck();
223 static lang::Locale lcl_GetPrimaryLanguageOfSentence(
224 const uno::Reference< text::XFlatParagraph >& xFlatPara,
225 sal_Int32 nStartIndex )
227 //get the language of the first word
228 return xFlatPara->getLanguageOfText( nStartIndex, 1 );
232 LngXStringKeyMap::LngXStringKeyMap() {}
234 void SAL_CALL LngXStringKeyMap::insertValue(const OUString& aKey, const css::uno::Any& aValue)
236 std::map<OUString, css::uno::Any>::const_iterator aIter = maMap.find(aKey);
237 if (aIter != maMap.end())
238 throw css::container::ElementExistException();
240 maMap[aKey] = aValue;
243 css::uno::Any SAL_CALL LngXStringKeyMap::getValue(const OUString& aKey)
245 std::map<OUString, css::uno::Any>::const_iterator aIter = maMap.find(aKey);
246 if (aIter == maMap.end())
247 throw css::container::NoSuchElementException();
249 return (*aIter).second;
252 sal_Bool SAL_CALL LngXStringKeyMap::hasValue(const OUString& aKey)
254 return maMap.find(aKey) != maMap.end();
257 ::sal_Int32 SAL_CALL LngXStringKeyMap::getCount() { return maMap.size(); }
259 OUString SAL_CALL LngXStringKeyMap::getKeyByIndex(::sal_Int32 nIndex)
261 if (nIndex < 0 || o3tl::make_unsigned(nIndex) >= maMap.size())
262 throw css::lang::IndexOutOfBoundsException();
264 return OUString();
267 css::uno::Any SAL_CALL LngXStringKeyMap::getValueByIndex(::sal_Int32 nIndex)
269 if (nIndex < 0 || o3tl::make_unsigned(nIndex) >= maMap.size())
270 throw css::lang::IndexOutOfBoundsException();
272 return css::uno::Any();
276 osl::Mutex& GrammarCheckingIterator::MyMutex()
278 static osl::Mutex SINGLETON;
279 return SINGLETON;
282 GrammarCheckingIterator::GrammarCheckingIterator() :
283 m_bEnd( false ),
284 m_bGCServicesChecked( false ),
285 m_nDocIdCounter( 0 ),
286 m_thread(nullptr),
287 m_aEventListeners( MyMutex() ),
288 m_aNotifyListeners( MyMutex() )
293 GrammarCheckingIterator::~GrammarCheckingIterator()
295 TerminateThread();
298 void GrammarCheckingIterator::TerminateThread()
300 oslThread t;
302 ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() );
303 t = m_thread;
304 m_thread = nullptr;
305 m_bEnd = true;
306 m_aWakeUpThread.set();
308 if (t != nullptr)
310 osl_joinWithThread(t);
311 osl_destroyThread(t);
316 bool GrammarCheckingIterator::joinThreads()
318 TerminateThread();
319 return true;
323 sal_Int32 GrammarCheckingIterator::NextDocId()
325 ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() );
326 m_nDocIdCounter += 1;
327 return m_nDocIdCounter;
331 OUString GrammarCheckingIterator::GetOrCreateDocId(
332 const uno::Reference< lang::XComponent > &xComponent )
334 // internal method; will always be called with locked mutex
336 OUString aRes;
337 if (xComponent.is())
339 if (m_aDocIdMap.find( xComponent.get() ) != m_aDocIdMap.end())
341 // return already existing entry
342 aRes = m_aDocIdMap[ xComponent.get() ];
344 else // add new entry
346 sal_Int32 nRes = NextDocId();
347 aRes = OUString::number( nRes );
348 m_aDocIdMap[ xComponent.get() ] = aRes;
349 xComponent->addEventListener( this );
352 return aRes;
356 void GrammarCheckingIterator::AddEntry(
357 const uno::Reference< text::XFlatParagraphIterator >& xFlatParaIterator,
358 const uno::Reference< text::XFlatParagraph >& xFlatPara,
359 const OUString & rDocId,
360 sal_Int32 nStartIndex,
361 bool bAutomatic )
363 // we may not need/have a xFlatParaIterator (e.g. if checkGrammarAtPos was called)
364 // but we always need a xFlatPara...
365 if (!xFlatPara.is())
366 return;
368 FPEntry aNewFPEntry;
369 aNewFPEntry.m_xParaIterator = xFlatParaIterator;
370 aNewFPEntry.m_xPara = xFlatPara;
371 aNewFPEntry.m_aDocId = rDocId;
372 aNewFPEntry.m_nStartIndex = nStartIndex;
373 aNewFPEntry.m_bAutomatic = bAutomatic;
375 // add new entry to the end of this queue
376 ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() );
377 if (!m_thread)
378 m_thread = osl_createThread( lcl_workerfunc, this );
379 m_aFPEntriesQueue.push_back( aNewFPEntry );
381 // wake up the thread in order to do grammar checking
382 m_aWakeUpThread.set();
386 void GrammarCheckingIterator::ProcessResult(
387 const linguistic2::ProofreadingResult &rRes,
388 const uno::Reference< text::XFlatParagraphIterator > &rxFlatParagraphIterator,
389 bool bIsAutomaticChecking )
391 DBG_ASSERT( rRes.xFlatParagraph.is(), "xFlatParagraph is missing" );
392 //no guard necessary as no members are used
393 bool bContinueWithNextPara = false;
394 if (!rRes.xFlatParagraph.is() || rRes.xFlatParagraph->isModified())
396 // if paragraph was modified/deleted meanwhile continue with the next one...
397 bContinueWithNextPara = true;
399 else // paragraph is still unchanged...
401 // mark found errors...
403 sal_Int32 nTextLen = rRes.aText.getLength();
404 bool bBoundariesOk = 0 <= rRes.nStartOfSentencePosition && rRes.nStartOfSentencePosition <= nTextLen &&
405 0 <= rRes.nBehindEndOfSentencePosition && rRes.nBehindEndOfSentencePosition <= nTextLen &&
406 0 <= rRes.nStartOfNextSentencePosition && rRes.nStartOfNextSentencePosition <= nTextLen &&
407 rRes.nStartOfSentencePosition <= rRes.nBehindEndOfSentencePosition &&
408 rRes.nBehindEndOfSentencePosition <= rRes.nStartOfNextSentencePosition;
409 DBG_ASSERT( bBoundariesOk, "inconsistent sentence boundaries" );
411 uno::Reference< text::XMultiTextMarkup > xMulti( rRes.xFlatParagraph, uno::UNO_QUERY );
412 if (xMulti.is()) // use new API for markups
416 // length = number of found errors + 1 sentence markup
417 sal_Int32 nErrors = rRes.aErrors.getLength();
418 uno::Sequence< text::TextMarkupDescriptor > aDescriptors( nErrors + 1 );
419 text::TextMarkupDescriptor * pDescriptors = aDescriptors.getArray();
421 uno::Reference< linguistic2::XDictionary > xIgnoreAll = ::GetIgnoreAllList();
422 sal_Int32 ignoredCount = 0;
424 // at pos 0 .. nErrors-1 -> all grammar errors
425 for (const linguistic2::SingleProofreadingError &rError : rRes.aErrors)
427 OUString word(rRes.aText.subView(rError.nErrorStart, rError.nErrorLength));
428 bool ignored = xIgnoreAll.is() && xIgnoreAll->getEntry(word).is();
430 if (!ignored)
432 text::TextMarkupDescriptor &rDesc = *pDescriptors++;
434 rDesc.nType = rError.nErrorType;
435 rDesc.nOffset = rError.nErrorStart;
436 rDesc.nLength = rError.nErrorLength;
438 // the proofreader may return SPELLING but right now our core
439 // does only handle PROOFREADING if the result is from the proofreader...
440 // (later on we may wish to color spelling errors found by the proofreader
441 // differently for example. But no special handling right now.
442 if (rDesc.nType == text::TextMarkupType::SPELLCHECK)
443 rDesc.nType = text::TextMarkupType::PROOFREADING;
445 uno::Reference< container::XStringKeyMap > xKeyMap(new LngXStringKeyMap());
446 for( const beans::PropertyValue& rProperty : rError.aProperties )
448 if ( rProperty.Name == "LineColor" )
450 xKeyMap->insertValue(rProperty.Name, rProperty.Value);
451 rDesc.xMarkupInfoContainer = xKeyMap;
453 else if ( rProperty.Name == "LineType" )
455 xKeyMap->insertValue(rProperty.Name, rProperty.Value);
456 rDesc.xMarkupInfoContainer = xKeyMap;
460 else
461 ignoredCount++;
464 if (ignoredCount != 0)
466 aDescriptors.realloc(aDescriptors.getLength() - ignoredCount);
467 pDescriptors = aDescriptors.getArray();
468 pDescriptors += aDescriptors.getLength() - 1;
471 // at pos nErrors -> sentence markup
472 // nSentenceLength: includes the white-spaces following the sentence end...
473 const sal_Int32 nSentenceLength = rRes.nStartOfNextSentencePosition - rRes.nStartOfSentencePosition;
474 pDescriptors->nType = text::TextMarkupType::SENTENCE;
475 pDescriptors->nOffset = rRes.nStartOfSentencePosition;
476 pDescriptors->nLength = nSentenceLength;
478 xMulti->commitMultiTextMarkup( aDescriptors ) ;
480 catch (lang::IllegalArgumentException &)
482 TOOLS_WARN_EXCEPTION( "linguistic", "commitMultiTextMarkup" );
486 // other sentences left to be checked in this paragraph?
487 if (rRes.nStartOfNextSentencePosition < rRes.aText.getLength())
489 AddEntry( rxFlatParagraphIterator, rRes.xFlatParagraph, rRes.aDocumentIdentifier, rRes.nStartOfNextSentencePosition, bIsAutomaticChecking );
491 else // current paragraph finished
493 // set "already checked" flag for the current flat paragraph
494 if (rRes.xFlatParagraph.is())
495 rRes.xFlatParagraph->setChecked( text::TextMarkupType::PROOFREADING, true );
497 bContinueWithNextPara = true;
501 if (bContinueWithNextPara)
503 // we need to continue with the next paragraph
504 if (rxFlatParagraphIterator.is())
505 AddEntry(rxFlatParagraphIterator, rxFlatParagraphIterator->getNextPara(),
506 rRes.aDocumentIdentifier, 0, bIsAutomaticChecking);
511 std::pair<OUString, std::optional<OUString>>
512 GrammarCheckingIterator::getServiceForLocale(const lang::Locale& rLocale) const
514 if (!rLocale.Language.isEmpty())
516 const OUString sBcp47 = LanguageTag::convertToBcp47(rLocale, false);
517 GCImplNames_t::const_iterator aLangIt(m_aGCImplNamesByLang.find(sBcp47));
518 if (aLangIt != m_aGCImplNamesByLang.end())
519 return { aLangIt->second, {} };
521 for (const auto& sFallbackBcp47 : LanguageTag(rLocale).getFallbackStrings(false))
523 aLangIt = m_aGCImplNamesByLang.find(sFallbackBcp47);
524 if (aLangIt != m_aGCImplNamesByLang.end())
525 return { aLangIt->second, sFallbackBcp47 };
529 return {};
533 uno::Reference< linguistic2::XProofreader > GrammarCheckingIterator::GetGrammarChecker(
534 lang::Locale &rLocale )
536 uno::Reference< linguistic2::XProofreader > xRes;
538 // ---- THREAD SAFE START ----
539 ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() );
541 // check supported locales for each grammarchecker if not already done
542 if (!m_bGCServicesChecked)
544 GetConfiguredGCSvcs_Impl();
545 m_bGCServicesChecked = true;
548 if (const auto& [aSvcImplName, oFallbackBcp47] = getServiceForLocale(rLocale);
549 !aSvcImplName.isEmpty()) // matching configured language found?
551 if (oFallbackBcp47)
552 rLocale = LanguageTag::convertToLocale(*oFallbackBcp47, false);
553 GCReferences_t::const_iterator aImplNameIt( m_aGCReferencesByService.find( aSvcImplName ) );
554 if (aImplNameIt != m_aGCReferencesByService.end()) // matching impl name found?
556 xRes = aImplNameIt->second;
558 else // the service is to be instantiated here for the first time...
562 uno::Reference< uno::XComponentContext > xContext( comphelper::getProcessComponentContext() );
563 uno::Reference< linguistic2::XProofreader > xGC(
564 xContext->getServiceManager()->createInstanceWithContext(aSvcImplName, xContext),
565 uno::UNO_QUERY_THROW );
566 uno::Reference< linguistic2::XSupportedLocales > xSuppLoc( xGC, uno::UNO_QUERY_THROW );
568 if (xSuppLoc->hasLocale( rLocale ))
570 m_aGCReferencesByService[ aSvcImplName ] = xGC;
571 xRes = xGC;
573 uno::Reference< linguistic2::XLinguServiceEventBroadcaster > xBC( xGC, uno::UNO_QUERY );
574 if (xBC.is())
575 xBC->addLinguServiceEventListener( this );
577 else
579 SAL_WARN( "linguistic", "grammar checker does not support required locale" );
582 catch (uno::Exception &)
584 SAL_WARN( "linguistic", "instantiating grammar checker failed" );
588 else // not found - quite normal
590 SAL_INFO("linguistic", "No grammar checker found for \""
591 << LanguageTag::convertToBcp47(rLocale, false) << "\"");
593 // ---- THREAD SAFE END ----
595 return xRes;
598 static uno::Sequence<beans::PropertyValue>
599 lcl_makeProperties(uno::Reference<text::XFlatParagraph> const& xFlatPara, sal_Int32 nProofInfo)
601 uno::Reference<beans::XPropertySet> const xProps(
602 xFlatPara, uno::UNO_QUERY_THROW);
603 css::uno::Any a (nProofInfo);
604 return comphelper::InitPropertySequence({
605 { "FieldPositions", xProps->getPropertyValue(u"FieldPositions"_ustr) },
606 { "FootnotePositions", xProps->getPropertyValue(u"FootnotePositions"_ustr) },
607 { "SortedTextId", xProps->getPropertyValue(u"SortedTextId"_ustr) },
608 { "DocumentElementsCount", xProps->getPropertyValue(u"DocumentElementsCount"_ustr) },
609 { "ProofInfo", a }
613 void GrammarCheckingIterator::DequeueAndCheck()
615 for (;;)
617 // ---- THREAD SAFE START ----
618 bool bQueueEmpty = false;
620 ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() );
621 if (m_bEnd)
623 break;
625 bQueueEmpty = m_aFPEntriesQueue.empty();
627 // ---- THREAD SAFE END ----
629 if (!bQueueEmpty)
631 uno::Reference< text::XFlatParagraphIterator > xFPIterator;
632 uno::Reference< text::XFlatParagraph > xFlatPara;
633 FPEntry aFPEntryItem;
634 OUString aCurDocId;
635 // ---- THREAD SAFE START ----
637 ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() );
638 aFPEntryItem = m_aFPEntriesQueue.front();
639 xFPIterator = aFPEntryItem.m_xParaIterator;
640 xFlatPara = aFPEntryItem.m_xPara;
641 m_aCurCheckedDocId = aFPEntryItem.m_aDocId;
642 aCurDocId = m_aCurCheckedDocId;
644 m_aFPEntriesQueue.pop_front();
646 // ---- THREAD SAFE END ----
648 if (xFlatPara.is() && xFPIterator.is())
652 OUString aCurTxt( xFlatPara->getText() );
653 lang::Locale aCurLocale = lcl_GetPrimaryLanguageOfSentence( xFlatPara, aFPEntryItem.m_nStartIndex );
655 const bool bModified = xFlatPara->isModified();
656 if (!bModified)
658 linguistic2::ProofreadingResult aRes;
660 // ---- THREAD SAFE START ----
662 osl::ClearableMutexGuard aGuard(MyMutex());
664 sal_Int32 nStartPos = aFPEntryItem.m_nStartIndex;
665 sal_Int32 nSuggestedEnd
666 = GetSuggestedEndOfSentence(aCurTxt, nStartPos, aCurLocale);
667 DBG_ASSERT((nSuggestedEnd == 0 && aCurTxt.isEmpty())
668 || nSuggestedEnd > nStartPos,
669 "nSuggestedEndOfSentencePos calculation failed?");
671 uno::Reference<linguistic2::XProofreader> xGC =
672 GetGrammarChecker(aCurLocale);
673 if (xGC.is())
675 aGuard.clear();
676 uno::Sequence<beans::PropertyValue> const aProps(
677 lcl_makeProperties(xFlatPara, PROOFINFO_MARK_PARAGRAPH));
678 aRes = xGC->doProofreading(aCurDocId, aCurTxt, aCurLocale,
679 nStartPos, nSuggestedEnd, aProps);
681 //!! work-around to prevent looping if the grammar checker
682 //!! failed to properly identify the sentence end
683 if (aRes.nBehindEndOfSentencePosition <= nStartPos
684 && aRes.nBehindEndOfSentencePosition != nSuggestedEnd)
686 SAL_WARN(
687 "linguistic",
688 "!! Grammarchecker failed to provide end of sentence !!");
689 aRes.nBehindEndOfSentencePosition = nSuggestedEnd;
692 aRes.xFlatParagraph = xFlatPara;
693 aRes.nStartOfSentencePosition = nStartPos;
695 else
697 // no grammar checker -> no error
698 // but we need to provide the data below in order to continue with the next sentence
699 aRes.aDocumentIdentifier = aCurDocId;
700 aRes.xFlatParagraph = xFlatPara;
701 aRes.aText = aCurTxt;
702 aRes.aLocale = aCurLocale;
703 aRes.nStartOfSentencePosition = nStartPos;
704 aRes.nBehindEndOfSentencePosition = nSuggestedEnd;
706 aRes.nStartOfNextSentencePosition
707 = lcl_SkipWhiteSpaces(aCurTxt, aRes.nBehindEndOfSentencePosition);
708 aRes.nBehindEndOfSentencePosition = lcl_BacktraceWhiteSpaces(
709 aCurTxt, aRes.nStartOfNextSentencePosition);
711 //guard has to be cleared as ProcessResult calls out of this class
713 // ---- THREAD SAFE END ----
714 ProcessResult( aRes, xFPIterator, aFPEntryItem.m_bAutomatic );
716 else
718 // the paragraph changed meanwhile... (and maybe is still edited)
719 // thus we simply continue to ask for the next to be checked.
720 uno::Reference< text::XFlatParagraph > xFlatParaNext( xFPIterator->getNextPara() );
721 AddEntry( xFPIterator, xFlatParaNext, aCurDocId, 0, aFPEntryItem.m_bAutomatic );
724 catch (css::uno::Exception &)
726 TOOLS_WARN_EXCEPTION("linguistic", "GrammarCheckingIterator::DequeueAndCheck ignoring");
730 // ---- THREAD SAFE START ----
732 ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() );
733 m_aCurCheckedDocId.clear();
735 // ---- THREAD SAFE END ----
737 else
739 // ---- THREAD SAFE START ----
741 ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() );
742 if (m_bEnd)
744 break;
746 // Check queue state again
747 if (m_aFPEntriesQueue.empty())
748 m_aWakeUpThread.reset();
750 // ---- THREAD SAFE END ----
752 //if the queue is empty
753 // IMPORTANT: Don't call condition.wait() with locked
754 // mutex. Otherwise you would keep out other threads
755 // to add entries to the queue! A condition is thread-
756 // safe implemented.
757 m_aWakeUpThread.wait();
763 void SAL_CALL GrammarCheckingIterator::startProofreading(
764 const uno::Reference< ::uno::XInterface > & xDoc,
765 const uno::Reference< text::XFlatParagraphIteratorProvider > & xIteratorProvider )
767 // get paragraph to start checking with
768 const bool bAutomatic = true;
769 uno::Reference<text::XFlatParagraphIterator> xFPIterator = xIteratorProvider->getFlatParagraphIterator(
770 text::TextMarkupType::PROOFREADING, bAutomatic );
771 uno::Reference< text::XFlatParagraph > xPara( xFPIterator.is()? xFPIterator->getFirstPara() : nullptr );
772 uno::Reference< lang::XComponent > xComponent( xDoc, uno::UNO_QUERY );
774 // ---- THREAD SAFE START ----
775 ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() );
776 if (xPara.is() && xComponent.is())
778 OUString aDocId = GetOrCreateDocId( xComponent );
780 // create new entry and add it to queue
781 AddEntry( xFPIterator, xPara, aDocId, 0, bAutomatic );
783 // ---- THREAD SAFE END ----
787 linguistic2::ProofreadingResult SAL_CALL GrammarCheckingIterator::checkSentenceAtPosition(
788 const uno::Reference< uno::XInterface >& xDoc,
789 const uno::Reference< text::XFlatParagraph >& xFlatPara,
790 const OUString& rText,
791 const lang::Locale&,
792 sal_Int32 nStartOfSentencePos,
793 sal_Int32 nSuggestedEndOfSentencePos,
794 sal_Int32 nErrorPosInPara )
796 // for the context menu...
798 uno::Reference< lang::XComponent > xComponent( xDoc, uno::UNO_QUERY );
799 const bool bDoCheck = (xFlatPara.is() && xComponent.is() &&
800 ( nErrorPosInPara < 0 || nErrorPosInPara < rText.getLength()));
802 if (!bDoCheck)
803 return linguistic2::ProofreadingResult();
805 // iterate through paragraph until we find the sentence we are interested in
806 linguistic2::ProofreadingResult aTmpRes;
807 sal_Int32 nStartPos = nStartOfSentencePos >= 0 ? nStartOfSentencePos : 0;
809 bool bFound = false;
812 lang::Locale aCurLocale = lcl_GetPrimaryLanguageOfSentence( xFlatPara, nStartPos );
813 sal_Int32 nOldStartOfSentencePos = nStartPos;
814 uno::Reference< linguistic2::XProofreader > xGC;
815 OUString aDocId;
817 // ---- THREAD SAFE START ----
819 ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() );
820 aDocId = GetOrCreateDocId( xComponent );
821 nSuggestedEndOfSentencePos = GetSuggestedEndOfSentence( rText, nStartPos, aCurLocale );
822 DBG_ASSERT( nSuggestedEndOfSentencePos > nStartPos, "nSuggestedEndOfSentencePos calculation failed?" );
824 xGC = GetGrammarChecker( aCurLocale );
826 // ---- THREAD SAFE START ----
827 sal_Int32 nEndPos = -1;
828 if (xGC.is())
830 uno::Sequence<beans::PropertyValue> const aProps(
831 lcl_makeProperties(xFlatPara, PROOFINFO_GET_PROOFRESULT));
832 aTmpRes = xGC->doProofreading( aDocId, rText,
833 aCurLocale, nStartPos, nSuggestedEndOfSentencePos, aProps );
835 //!! work-around to prevent looping if the grammar checker
836 //!! failed to properly identify the sentence end
837 if (aTmpRes.nBehindEndOfSentencePosition <= nStartPos)
839 SAL_WARN( "linguistic", "!! Grammarchecker failed to provide end of sentence !!" );
840 aTmpRes.nBehindEndOfSentencePosition = nSuggestedEndOfSentencePos;
843 aTmpRes.xFlatParagraph = xFlatPara;
844 aTmpRes.nStartOfSentencePosition = nStartPos;
845 nEndPos = aTmpRes.nBehindEndOfSentencePosition;
847 if ((nErrorPosInPara< 0 || nStartPos <= nErrorPosInPara) && nErrorPosInPara < nEndPos)
848 bFound = true;
850 if (nEndPos == -1) // no result from grammar checker
851 nEndPos = nSuggestedEndOfSentencePos;
852 nStartPos = lcl_SkipWhiteSpaces( rText, nEndPos );
853 aTmpRes.nBehindEndOfSentencePosition = nEndPos;
854 aTmpRes.nStartOfNextSentencePosition = nStartPos;
855 aTmpRes.nBehindEndOfSentencePosition = lcl_BacktraceWhiteSpaces( rText, aTmpRes.nStartOfNextSentencePosition );
857 // prevent endless loop by forcefully advancing if needs be...
858 if (nStartPos <= nOldStartOfSentencePos)
860 SAL_WARN( "linguistic", "end-of-sentence detection failed?" );
861 nStartPos = nOldStartOfSentencePos + 1;
864 while (!bFound && nStartPos < rText.getLength());
866 if (bFound && !xFlatPara->isModified())
867 return aTmpRes;
869 return linguistic2::ProofreadingResult();
872 sal_Int32 GrammarCheckingIterator::GetSuggestedEndOfSentence(
873 const OUString &rText,
874 sal_Int32 nSentenceStartPos,
875 const lang::Locale &rLocale )
877 // internal method; will always be called with locked mutex
879 if (!m_xBreakIterator.is())
881 uno::Reference< uno::XComponentContext > xContext = ::comphelper::getProcessComponentContext();
882 m_xBreakIterator = i18n::BreakIterator::create(xContext);
884 sal_Int32 nTextLen = rText.getLength();
885 sal_Int32 nEndPosition(0);
886 sal_Int32 nTmpStartPos = nSentenceStartPos;
889 sal_Int32 const nPrevEndPosition(nEndPosition);
890 nEndPosition = nTextLen;
891 if (nTmpStartPos < nTextLen)
893 nEndPosition = m_xBreakIterator->endOfSentence( rText, nTmpStartPos, rLocale );
894 if (nEndPosition <= nPrevEndPosition)
896 // fdo#68750 if there's no progress at all then presumably
897 // there's no end of sentence in this paragraph so just
898 // set the end position to end of paragraph
899 nEndPosition = nTextLen;
902 if (nEndPosition < 0)
903 nEndPosition = nTextLen;
905 ++nTmpStartPos;
907 while (nEndPosition <= nSentenceStartPos && nEndPosition < nTextLen);
908 if (nEndPosition > nTextLen)
909 nEndPosition = nTextLen;
910 return nEndPosition;
914 void SAL_CALL GrammarCheckingIterator::resetIgnoreRules( )
916 for (auto const& elem : m_aGCReferencesByService)
918 uno::Reference< linguistic2::XProofreader > xGC(elem.second);
919 if (xGC.is())
920 xGC->resetIgnoreRules();
925 sal_Bool SAL_CALL GrammarCheckingIterator::isProofreading(
926 const uno::Reference< uno::XInterface >& xDoc )
928 // ---- THREAD SAFE START ----
929 ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() );
931 bool bRes = false;
933 uno::Reference< lang::XComponent > xComponent( xDoc, uno::UNO_QUERY );
934 if (xComponent.is())
936 // if the component was already used in one of the two calls to check text
937 // i.e. in startGrammarChecking or checkGrammarAtPos it will be found in the
938 // m_aDocIdMap unless the document already disposed.
939 // If it is not found then it is not yet being checked (or requested to being checked)
940 const DocMap_t::const_iterator aIt( m_aDocIdMap.find( xComponent.get() ) );
941 if (aIt != m_aDocIdMap.end())
943 // check in document is checked automatically in the background...
944 OUString aDocId = aIt->second;
945 if (!m_aCurCheckedDocId.isEmpty() && m_aCurCheckedDocId == aDocId)
947 // an entry for that document was dequeued and is currently being checked.
948 bRes = true;
950 else
952 // we need to check if there is an entry for that document in the queue...
953 // That is the document is going to be checked sooner or later.
955 sal_Int32 nSize = m_aFPEntriesQueue.size();
956 for (sal_Int32 i = 0; i < nSize && !bRes; ++i)
958 if (aDocId == m_aFPEntriesQueue[i].m_aDocId)
959 bRes = true;
964 // ---- THREAD SAFE END ----
966 return bRes;
970 void SAL_CALL GrammarCheckingIterator::processLinguServiceEvent(
971 const linguistic2::LinguServiceEvent& rLngSvcEvent )
973 if (rLngSvcEvent.nEvent != linguistic2::LinguServiceEventFlags::PROOFREAD_AGAIN)
974 return;
978 uno::Reference< uno::XInterface > xThis( getXWeak() );
979 linguistic2::LinguServiceEvent aEvent( xThis, linguistic2::LinguServiceEventFlags::PROOFREAD_AGAIN );
980 m_aNotifyListeners.notifyEach(
981 &linguistic2::XLinguServiceEventListener::processLinguServiceEvent,
982 aEvent);
984 catch (uno::RuntimeException &)
986 throw;
988 catch (const ::uno::Exception &)
990 // ignore
991 TOOLS_WARN_EXCEPTION("linguistic", "processLinguServiceEvent");
996 sal_Bool SAL_CALL GrammarCheckingIterator::addLinguServiceEventListener(
997 const uno::Reference< linguistic2::XLinguServiceEventListener >& xListener )
999 if (xListener.is())
1001 m_aNotifyListeners.addInterface( xListener );
1003 return true;
1007 sal_Bool SAL_CALL GrammarCheckingIterator::removeLinguServiceEventListener(
1008 const uno::Reference< linguistic2::XLinguServiceEventListener >& xListener )
1010 if (xListener.is())
1012 m_aNotifyListeners.removeInterface( xListener );
1014 return true;
1018 void SAL_CALL GrammarCheckingIterator::dispose()
1020 lang::EventObject aEvt( static_cast<linguistic2::XProofreadingIterator *>(this) );
1021 m_aEventListeners.disposeAndClear( aEvt );
1023 TerminateThread();
1025 // ---- THREAD SAFE START ----
1027 ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() );
1029 // release all UNO references
1031 m_xBreakIterator.clear();
1033 // clear containers with UNO references AND have those references released
1034 GCReferences_t aTmpEmpty1;
1035 DocMap_t aTmpEmpty2;
1036 FPQueue_t aTmpEmpty3;
1037 m_aGCReferencesByService.swap( aTmpEmpty1 );
1038 m_aDocIdMap.swap( aTmpEmpty2 );
1039 m_aFPEntriesQueue.swap( aTmpEmpty3 );
1041 // ---- THREAD SAFE END ----
1045 void SAL_CALL GrammarCheckingIterator::addEventListener(
1046 const uno::Reference< lang::XEventListener >& xListener )
1048 if (xListener.is())
1050 m_aEventListeners.addInterface( xListener );
1055 void SAL_CALL GrammarCheckingIterator::removeEventListener(
1056 const uno::Reference< lang::XEventListener >& xListener )
1058 if (xListener.is())
1060 m_aEventListeners.removeInterface( xListener );
1065 void SAL_CALL GrammarCheckingIterator::disposing( const lang::EventObject &rSource )
1067 // if the component (document) is disposing release all references
1068 //!! There is no need to remove entries from the queue that are from this document
1069 //!! since the respectives xFlatParagraphs should become invalid (isModified() == true)
1070 //!! and the call to xFlatParagraphIterator->getNextPara() will result in an empty reference.
1071 //!! And if an entry is currently checked by a grammar checker upon return the results
1072 //!! should be ignored.
1073 //!! Also GetOrCreateDocId will not use that very same Id again...
1074 //!! All of the above resulting in that we only have to get rid of the implementation pointer here.
1075 uno::Reference< lang::XComponent > xDoc( rSource.Source, uno::UNO_QUERY );
1076 if (xDoc.is())
1078 // ---- THREAD SAFE START ----
1079 ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() );
1080 m_aDocIdMap.erase( xDoc.get() );
1081 // ---- THREAD SAFE END ----
1086 uno::Reference< util::XChangesBatch > const & GrammarCheckingIterator::GetUpdateAccess() const
1088 if (!m_xUpdateAccess.is())
1092 // get configuration provider
1093 uno::Reference< uno::XComponentContext > xContext = comphelper::getProcessComponentContext();
1094 uno::Reference< lang::XMultiServiceFactory > xConfigurationProvider =
1095 configuration::theDefaultProvider::get( xContext );
1097 // get configuration update access
1098 beans::PropertyValue aValue;
1099 aValue.Name = "nodepath";
1100 aValue.Value <<= u"org.openoffice.Office.Linguistic/ServiceManager"_ustr;
1101 uno::Sequence< uno::Any > aProps{ uno::Any(aValue) };
1102 m_xUpdateAccess.set(
1103 xConfigurationProvider->createInstanceWithArguments(
1104 u"com.sun.star.configuration.ConfigurationUpdateAccess"_ustr, aProps ),
1105 uno::UNO_QUERY_THROW );
1107 catch (uno::Exception &)
1112 return m_xUpdateAccess;
1116 void GrammarCheckingIterator::GetConfiguredGCSvcs_Impl()
1118 GCImplNames_t aTmpGCImplNamesByLang;
1122 // get node names (locale iso strings) for configured grammar checkers
1123 uno::Reference< container::XNameAccess > xNA( GetUpdateAccess(), uno::UNO_QUERY_THROW );
1124 xNA.set( xNA->getByName( u"GrammarCheckerList"_ustr ), uno::UNO_QUERY_THROW );
1125 const uno::Sequence< OUString > aElementNames( xNA->getElementNames() );
1127 for (const OUString& rElementName : aElementNames)
1129 uno::Sequence< OUString > aImplNames;
1130 uno::Any aTmp( xNA->getByName( rElementName ) );
1131 if (aTmp >>= aImplNames)
1133 if (aImplNames.hasElements())
1135 // only the first entry is used, there should be only one grammar checker per language
1136 const OUString aImplName( aImplNames[0] );
1137 aTmpGCImplNamesByLang[rElementName] = aImplName;
1140 else
1142 SAL_WARN( "linguistic", "failed to get aImplNames. Wrong type?" );
1146 catch (uno::Exception const &)
1148 TOOLS_WARN_EXCEPTION( "linguistic", "exception caught. Failed to get configured services" );
1152 // ---- THREAD SAFE START ----
1153 ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() );
1154 m_aGCImplNamesByLang.swap(aTmpGCImplNamesByLang);
1155 // ---- THREAD SAFE END ----
1160 sal_Bool SAL_CALL GrammarCheckingIterator::supportsService(
1161 const OUString & rServiceName )
1163 return cppu::supportsService(this, rServiceName);
1167 OUString SAL_CALL GrammarCheckingIterator::getImplementationName( )
1169 return u"com.sun.star.lingu2.ProofreadingIterator"_ustr;
1173 uno::Sequence< OUString > SAL_CALL GrammarCheckingIterator::getSupportedServiceNames( )
1175 return { u"com.sun.star.linguistic2.ProofreadingIterator"_ustr };
1179 void GrammarCheckingIterator::SetServiceList(
1180 const lang::Locale &rLocale,
1181 const uno::Sequence< OUString > &rSvcImplNames )
1183 ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() );
1185 OUString sBcp47 = LanguageTag::convertToBcp47(rLocale, false);
1186 OUString aImplName;
1187 if (rSvcImplNames.hasElements())
1188 aImplName = rSvcImplNames[0]; // there is only one grammar checker per language
1190 if (!LinguIsUnspecified(sBcp47) && !sBcp47.isEmpty())
1192 if (!aImplName.isEmpty())
1193 m_aGCImplNamesByLang[sBcp47] = aImplName;
1194 else
1195 m_aGCImplNamesByLang.erase(sBcp47);
1200 uno::Sequence< OUString > GrammarCheckingIterator::GetServiceList(
1201 const lang::Locale &rLocale ) const
1203 ::osl::Guard< ::osl::Mutex > aGuard( MyMutex() );
1205 const OUString aImplName = getServiceForLocale(rLocale).first; // there is only one grammar checker per language
1207 if (!aImplName.isEmpty())
1208 return { aImplName };
1209 return {};
1213 extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
1214 linguistic_GrammarCheckingIterator_get_implementation(
1215 css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&)
1217 return cppu::acquire(new GrammarCheckingIterator());
1222 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */