More usual 1899-12-30 base Date in Basic
[LibreOffice.git] / lingucomponent / source / languageguessing / guesslang.cxx
blobd6d5803a51a7179c58ac0a087657ce6a756520a0
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <iostream>
21 #include <mutex>
22 #include <string_view>
24 #include <osl/file.hxx>
25 #include <tools/debug.hxx>
27 #include <sal/config.h>
28 #include <cppuhelper/factory.hxx>
29 #include <cppuhelper/implbase.hxx>
30 #include <cppuhelper/supportsservice.hxx>
32 #include "simpleguesser.hxx"
33 #include "guess.hxx"
35 #include <com/sun/star/lang/IllegalArgumentException.hpp>
36 #include <com/sun/star/lang/XServiceInfo.hpp>
37 #include <com/sun/star/linguistic2/XLanguageGuessing.hpp>
38 #include <unotools/pathoptions.hxx>
39 #include <osl/thread.h>
41 #include <sal/macros.h>
43 #ifdef SYSTEM_LIBEXTTEXTCAT
44 #include <libexttextcat/textcat.h>
45 #else
46 #include <textcat.h>
47 #endif
49 using namespace ::std;
50 using namespace ::osl;
51 using namespace ::cppu;
52 using namespace ::com::sun::star;
53 using namespace ::com::sun::star::uno;
54 using namespace ::com::sun::star::lang;
55 using namespace ::com::sun::star::linguistic2;
57 static std::mutex & GetLangGuessMutex()
59 static std::mutex aMutex;
60 return aMutex;
63 namespace {
65 class LangGuess_Impl :
66 public ::cppu::WeakImplHelper<
67 XLanguageGuessing,
68 XServiceInfo >
70 SimpleGuesser m_aGuesser;
71 bool m_bInitialized;
73 virtual ~LangGuess_Impl() override {}
74 void EnsureInitialized();
76 public:
77 LangGuess_Impl();
78 LangGuess_Impl(const LangGuess_Impl&) = delete;
79 LangGuess_Impl& operator=(const LangGuess_Impl&) = delete;
81 // XServiceInfo implementation
82 virtual OUString SAL_CALL getImplementationName( ) override;
83 virtual sal_Bool SAL_CALL supportsService( const OUString& ServiceName ) override;
84 virtual Sequence< OUString > SAL_CALL getSupportedServiceNames( ) override;
86 // XLanguageGuessing implementation
87 virtual css::lang::Locale SAL_CALL guessPrimaryLanguage( const OUString& aText, ::sal_Int32 nStartPos, ::sal_Int32 nLen ) override;
88 virtual void SAL_CALL disableLanguages( const css::uno::Sequence< css::lang::Locale >& aLanguages ) override;
89 virtual void SAL_CALL enableLanguages( const css::uno::Sequence< css::lang::Locale >& aLanguages ) override;
90 virtual css::uno::Sequence< css::lang::Locale > SAL_CALL getAvailableLanguages( ) override;
91 virtual css::uno::Sequence< css::lang::Locale > SAL_CALL getEnabledLanguages( ) override;
92 virtual css::uno::Sequence< css::lang::Locale > SAL_CALL getDisabledLanguages( ) override;
94 // implementation specific
95 /// @throws RuntimeException
96 void SetFingerPrintsDB( std::u16string_view fileName );
101 LangGuess_Impl::LangGuess_Impl() :
102 m_bInitialized( false )
106 void LangGuess_Impl::EnsureInitialized()
108 if (m_bInitialized)
109 return;
111 // set this to true at the very start to prevent loops because of
112 // implicitly called functions below
113 m_bInitialized = true;
115 // set default fingerprint path to where those get installed
116 OUString aPhysPath;
117 OUString aURL( SvtPathOptions().GetFingerprintPath() );
118 osl::FileBase::getSystemPathFromFileURL( aURL, aPhysPath );
119 #ifdef _WIN32
120 aPhysPath += "\\";
121 #else
122 aPhysPath += "/";
123 #endif
125 SetFingerPrintsDB( aPhysPath );
127 #if !defined(EXTTEXTCAT_VERSION_MAJOR)
129 // disable currently not functional languages...
130 struct LangCountry
132 const char *pLang;
133 const char *pCountry;
135 LangCountry aDisable[] =
137 // not functional in modified libtextcat, but fixed in >= libexttextcat 3.1.0
138 // which is the first with EXTTEXTCAT_VERSION_MAJOR defined
139 {"sco", ""}, {"zh", "CN"}, {"zh", "TW"}, {"ja", ""}, {"ko", ""},
140 {"ka", ""}, {"hi", ""}, {"mr", ""}, {"ne", ""}, {"sa", ""},
141 {"ta", ""}, {"th", ""}, {"qu", ""}, {"yi", ""}
143 sal_Int32 nNum = SAL_N_ELEMENTS(aDisable);
144 Sequence< Locale > aDisableSeq( nNum );
145 Locale *pDisableSeq = aDisableSeq.getArray();
146 for (sal_Int32 i = 0; i < nNum; ++i)
148 Locale aLocale;
149 aLocale.Language = OUString::createFromAscii( aDisable[i].pLang );
150 aLocale.Country = OUString::createFromAscii( aDisable[i].pCountry );
151 pDisableSeq[i] = aLocale;
153 disableLanguages( aDisableSeq );
154 DBG_ASSERT( nNum == getDisabledLanguages().getLength(), "size mismatch" );
155 #endif
158 Locale SAL_CALL LangGuess_Impl::guessPrimaryLanguage(
159 const OUString& rText,
160 ::sal_Int32 nStartPos,
161 ::sal_Int32 nLen )
163 std::scoped_lock aGuard( GetLangGuessMutex() );
165 EnsureInitialized();
167 if (nStartPos < 0 || nLen < 0 || nStartPos + nLen > rText.getLength())
168 throw lang::IllegalArgumentException();
170 OString o( OUStringToOString( rText.subView(nStartPos, nLen), RTL_TEXTENCODING_UTF8 ) );
171 Guess g = m_aGuesser.GuessPrimaryLanguage(o.getStr());
172 lang::Locale aRes;
173 aRes.Language = OUString::createFromAscii( g.GetLanguage().c_str() );
174 aRes.Country = OUString::createFromAscii( g.GetCountry().c_str() );
175 return aRes;
178 #define DEFAULT_CONF_FILE_NAME "fpdb.conf"
180 void LangGuess_Impl::SetFingerPrintsDB(
181 std::u16string_view filePath )
183 //! text encoding for file name / path needs to be in the same encoding the OS uses
184 OString path = OUStringToOString( filePath, osl_getThreadTextEncoding() );
185 OString conf_file_path = path + DEFAULT_CONF_FILE_NAME;
187 m_aGuesser.SetDBPath(conf_file_path.getStr(), path.getStr());
190 uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getAvailableLanguages( )
192 std::scoped_lock aGuard( GetLangGuessMutex() );
194 EnsureInitialized();
196 Sequence< css::lang::Locale > aRes;
197 vector<Guess> gs = m_aGuesser.GetAllManagedLanguages();
198 aRes.realloc(gs.size());
200 css::lang::Locale *pRes = aRes.getArray();
202 for(size_t i = 0; i < gs.size() ; i++ ){
203 css::lang::Locale current_aRes;
204 current_aRes.Language = OUString::createFromAscii( gs[i].GetLanguage().c_str() );
205 current_aRes.Country = OUString::createFromAscii( gs[i].GetCountry().c_str() );
206 pRes[i] = current_aRes;
209 return aRes;
212 uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getEnabledLanguages( )
214 std::scoped_lock aGuard( GetLangGuessMutex() );
216 EnsureInitialized();
218 Sequence< css::lang::Locale > aRes;
219 vector<Guess> gs = m_aGuesser.GetAvailableLanguages();
220 aRes.realloc(gs.size());
222 css::lang::Locale *pRes = aRes.getArray();
224 for(size_t i = 0; i < gs.size() ; i++ ){
225 css::lang::Locale current_aRes;
226 current_aRes.Language = OUString::createFromAscii( gs[i].GetLanguage().c_str() );
227 current_aRes.Country = OUString::createFromAscii( gs[i].GetCountry().c_str() );
228 pRes[i] = current_aRes;
231 return aRes;
234 uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getDisabledLanguages( )
236 std::scoped_lock aGuard( GetLangGuessMutex() );
238 EnsureInitialized();
240 Sequence< css::lang::Locale > aRes;
241 vector<Guess> gs = m_aGuesser.GetUnavailableLanguages();
242 aRes.realloc(gs.size());
244 css::lang::Locale *pRes = aRes.getArray();
246 for(size_t i = 0; i < gs.size() ; i++ ){
247 css::lang::Locale current_aRes;
248 current_aRes.Language = OUString::createFromAscii( gs[i].GetLanguage().c_str() );
249 current_aRes.Country = OUString::createFromAscii( gs[i].GetCountry().c_str() );
250 pRes[i] = current_aRes;
253 return aRes;
256 void SAL_CALL LangGuess_Impl::disableLanguages(
257 const uno::Sequence< Locale >& rLanguages )
259 std::scoped_lock aGuard( GetLangGuessMutex() );
261 EnsureInitialized();
263 for (const Locale& rLanguage : rLanguages)
265 string language;
267 OString l = OUStringToOString( rLanguage.Language, RTL_TEXTENCODING_ASCII_US );
268 OString c = OUStringToOString( rLanguage.Country, RTL_TEXTENCODING_ASCII_US );
270 language += l.getStr();
271 language += "-";
272 language += c.getStr();
273 m_aGuesser.DisableLanguage(language);
277 void SAL_CALL LangGuess_Impl::enableLanguages(
278 const uno::Sequence< Locale >& rLanguages )
280 std::scoped_lock aGuard( GetLangGuessMutex() );
282 EnsureInitialized();
284 for (const Locale& rLanguage : rLanguages)
286 string language;
288 OString l = OUStringToOString( rLanguage.Language, RTL_TEXTENCODING_ASCII_US );
289 OString c = OUStringToOString( rLanguage.Country, RTL_TEXTENCODING_ASCII_US );
291 language += l.getStr();
292 language += "-";
293 language += c.getStr();
294 m_aGuesser.EnableLanguage(language);
298 OUString SAL_CALL LangGuess_Impl::getImplementationName( )
300 return "com.sun.star.lingu2.LanguageGuessing";
303 sal_Bool SAL_CALL LangGuess_Impl::supportsService( const OUString& ServiceName )
305 return cppu::supportsService(this, ServiceName);
308 Sequence<OUString> SAL_CALL LangGuess_Impl::getSupportedServiceNames( )
310 return { "com.sun.star.linguistic2.LanguageGuessing" };
313 extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
314 lingucomponent_LangGuess_get_implementation(
315 css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&)
317 return cppu::acquire(new LangGuess_Impl());
321 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */