Backed out 4 changesets (bug 1917102) for causing crashes (bug 1917444). CLOSED TREE
[gecko.git] / extensions / spellcheck / hunspell / glue / mozHunspell.cpp
blob1c6a60ae81506b0b62e552a018936cde117cb2aa
1 /******* BEGIN LICENSE BLOCK *******
2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 * The contents of this file are subject to the Mozilla Public License Version
5 * 1.1 (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 * http://www.mozilla.org/MPL/
9 * Software distributed under the License is distributed on an "AS IS" basis,
10 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11 * for the specific language governing rights and limitations under the
12 * License.
14 * The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
15 * and László Németh (Hunspell). Portions created by the Initial Developers
16 * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
18 * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
19 * David Einstein (deinst@world.std.com)
20 * Michiel van Leeuwen (mvl@exedo.nl)
21 * Caolan McNamara (cmc@openoffice.org)
22 * László Németh (nemethl@gyorsposta.hu)
23 * Davide Prina
24 * Giuseppe Modugno
25 * Gianluca Turconi
26 * Simon Brouwer
27 * Noll Janos
28 * Biro Arpad
29 * Goldman Eleonora
30 * Sarlos Tamas
31 * Bencsath Boldizsar
32 * Halacsy Peter
33 * Dvornik Laszlo
34 * Gefferth Andras
35 * Nagy Viktor
36 * Varga Daniel
37 * Chris Halls
38 * Rene Engelhard
39 * Bram Moolenaar
40 * Dafydd Jones
41 * Harri Pitkanen
42 * Andras Timar
43 * Tor Lillqvist
44 * Jesper Kristensen (mail@jesperkristensen.dk)
46 * Alternatively, the contents of this file may be used under the terms of
47 * either the GNU General Public License Version 2 or later (the "GPL"), or
48 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
49 * in which case the provisions of the GPL or the LGPL are applicable instead
50 * of those above. If you wish to allow use of your version of this file only
51 * under the terms of either the GPL or the LGPL, and not to allow others to
52 * use your version of this file under the terms of the MPL, indicate your
53 * decision by deleting the provisions above and replace them with the notice
54 * and other provisions required by the GPL or the LGPL. If you do not delete
55 * the provisions above, a recipient may use your version of this file under
56 * the terms of any one of the MPL, the GPL or the LGPL.
58 ******* END LICENSE BLOCK *******/
60 #include "mozHunspell.h"
61 #include "nsReadableUtils.h"
62 #include "nsString.h"
63 #include "nsIObserverService.h"
64 #include "nsIDirectoryEnumerator.h"
65 #include "nsIFile.h"
66 #include "nsUnicharUtils.h"
67 #include "nsCRT.h"
68 #include "mozInlineSpellChecker.h"
69 #include "nsIPrefBranch.h"
70 #include "nsIPrefService.h"
71 #include "nsNetUtil.h"
72 #include "prenv.h"
73 #include "mozilla/Components.h"
74 #include "mozilla/Services.h"
75 #include "mozilla/dom/ContentParent_NotifyUpdatedDictionaries.h"
77 #include <stdlib.h>
78 #include <tuple>
80 using namespace mozilla;
82 NS_IMPL_CYCLE_COLLECTING_ADDREF(mozHunspell)
83 NS_IMPL_CYCLE_COLLECTING_RELEASE(mozHunspell)
85 NS_INTERFACE_MAP_BEGIN(mozHunspell)
86 NS_INTERFACE_MAP_ENTRY(mozISpellCheckingEngine)
87 NS_INTERFACE_MAP_ENTRY(nsIObserver)
88 NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference)
89 NS_INTERFACE_MAP_ENTRY(nsIMemoryReporter)
90 NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, mozISpellCheckingEngine)
91 NS_INTERFACE_MAP_ENTRIES_CYCLE_COLLECTION(mozHunspell)
92 NS_INTERFACE_MAP_END
94 NS_IMPL_CYCLE_COLLECTION_WEAK(mozHunspell, mPersonalDictionary)
96 NS_IMPL_COMPONENT_FACTORY(mozHunspell) {
97 auto hunspell = MakeRefPtr<mozHunspell>();
98 if (NS_SUCCEEDED(hunspell->Init())) {
99 return hunspell.forget().downcast<mozISpellCheckingEngine>();
101 return nullptr;
104 mozHunspell::mozHunspell() {
105 #ifdef DEBUG
106 // There must be only one instance of this class: it reports memory based on
107 // a single static count in HunspellAllocator.
108 static bool hasRun = false;
109 MOZ_ASSERT(!hasRun);
110 hasRun = true;
111 #endif
114 nsresult mozHunspell::Init() {
115 LoadDictionaryList(false);
117 nsCOMPtr<nsIObserverService> obs = mozilla::services::GetObserverService();
118 if (obs) {
119 obs->AddObserver(this, "profile-do-change", true);
120 obs->AddObserver(this, "profile-after-change", true);
123 mozilla::RegisterWeakMemoryReporter(this);
125 return NS_OK;
128 mozHunspell::~mozHunspell() {
129 mozilla::UnregisterWeakMemoryReporter(this);
131 mPersonalDictionary = nullptr;
132 mHunspells.Clear();
135 NS_IMETHODIMP
136 mozHunspell::GetDictionaries(nsTArray<nsCString>& aDictionaries) {
137 MOZ_ASSERT(aDictionaries.IsEmpty());
138 for (auto iter = mHunspells.ConstIter(); !iter.Done(); iter.Next()) {
139 if (iter.Data().mEnabled) {
140 aDictionaries.AppendElement(iter.Key());
143 return NS_OK;
146 /* Set the Dictionaries.
147 * This also Loads the dictionaries and initializes the converter using the
148 * dictionaries converter
150 NS_IMETHODIMP
151 mozHunspell::SetDictionaries(const nsTArray<nsCString>& aDictionaries) {
152 if (aDictionaries.IsEmpty()) {
153 mHunspells.Clear();
154 return NS_OK;
157 // Disable any dictionaries we've already loaded that we're not
158 // going to use.
159 for (auto iter = mHunspells.Iter(); !iter.Done(); iter.Next()) {
160 if (!aDictionaries.Contains(iter.Key())) {
161 iter.Data().mEnabled = false;
165 bool firstDictionary = true;
166 for (const auto& dictionary : aDictionaries) {
167 NS_ConvertUTF8toUTF16 dict(dictionary);
168 nsIURI* affFile = mDictionaries.GetWeak(dict);
169 if (!affFile) {
170 return NS_ERROR_FILE_NOT_FOUND;
173 nsAutoCString affFileName;
174 nsresult rv = affFile->GetSpec(affFileName);
175 NS_ENSURE_SUCCESS(rv, rv);
177 if (auto entry = mHunspells.Lookup(dictionary)) {
178 if (entry.Data().mAffixFileName == affFileName) {
179 entry.Data().mEnabled = true;
180 continue;
184 DictionaryData dictionaryData;
185 dictionaryData.mAffixFileName = affFileName;
187 // Load the first dictionary now, we'll load the others lazily during
188 // checking.
189 if (firstDictionary) {
190 rv = dictionaryData.LoadIfNecessary();
191 NS_ENSURE_SUCCESS(rv, rv);
192 firstDictionary = false;
195 mHunspells.InsertOrUpdate(dictionary, std::move(dictionaryData));
198 // If we have a large number of dictionaries loaded, try freeing any disabled
199 // dictionaries to limit memory use.
200 if (mHunspells.Count() > 10) {
201 mHunspells.RemoveIf([](const auto& iter) { return !iter.Data().mEnabled; });
204 return NS_OK;
207 NS_IMETHODIMP mozHunspell::GetPersonalDictionary(
208 mozIPersonalDictionary** aPersonalDictionary) {
209 *aPersonalDictionary = mPersonalDictionary;
210 NS_IF_ADDREF(*aPersonalDictionary);
211 return NS_OK;
214 NS_IMETHODIMP mozHunspell::SetPersonalDictionary(
215 mozIPersonalDictionary* aPersonalDictionary) {
216 mPersonalDictionary = aPersonalDictionary;
217 return NS_OK;
220 NS_IMETHODIMP mozHunspell::GetDictionaryList(
221 nsTArray<nsCString>& aDictionaries) {
222 MOZ_ASSERT(aDictionaries.IsEmpty());
223 for (const auto& key : mDictionaries.Keys()) {
224 aDictionaries.AppendElement(NS_ConvertUTF16toUTF8(key));
227 return NS_OK;
230 void mozHunspell::LoadDictionaryList(bool aNotifyChildProcesses) {
231 mDictionaries.Clear();
233 nsresult rv;
235 // find built in dictionaries, or dictionaries specified in
236 // spellchecker.dictionary_path in prefs
237 nsCOMPtr<nsIFile> dictDir;
239 // check preferences first
240 nsCOMPtr<nsIPrefBranch> prefs(do_GetService(NS_PREFSERVICE_CONTRACTID));
241 if (prefs) {
242 nsAutoCString extDictPath;
243 rv = prefs->GetCharPref("spellchecker.dictionary_path", extDictPath);
244 if (NS_SUCCEEDED(rv)) {
245 // set the spellchecker.dictionary_path
246 rv = NS_NewNativeLocalFile(extDictPath, true, getter_AddRefs(dictDir));
248 if (dictDir) {
249 LoadDictionariesFromDir(dictDir);
253 // find dictionaries in DICPATH
254 char* dicEnv = PR_GetEnv("DICPATH");
255 if (dicEnv) {
256 // do a two-pass dance so dictionaries are loaded right-to-left as
257 // preference
258 nsTArray<nsCOMPtr<nsIFile>> dirs;
259 nsAutoCString env(dicEnv); // assume dicEnv is UTF-8
261 char* currPath = nullptr;
262 char* nextPaths = env.BeginWriting();
263 while ((currPath = NS_strtok(":", &nextPaths))) {
264 nsCOMPtr<nsIFile> dir;
265 rv =
266 NS_NewNativeLocalFile(nsCString(currPath), true, getter_AddRefs(dir));
267 if (NS_SUCCEEDED(rv)) {
268 dirs.AppendElement(dir);
272 // load them in reverse order so they override each other properly
273 for (int32_t i = dirs.Length() - 1; i >= 0; i--) {
274 LoadDictionariesFromDir(dirs[i]);
278 // find dictionaries from restartless extensions
279 for (int32_t i = 0; i < mDynamicDirectories.Count(); i++) {
280 LoadDictionariesFromDir(mDynamicDirectories[i]);
283 for (const auto& dictionaryEntry : mDynamicDictionaries) {
284 mDictionaries.InsertOrUpdate(dictionaryEntry.GetKey(),
285 dictionaryEntry.GetData());
288 DictionariesChanged(aNotifyChildProcesses);
291 void mozHunspell::DictionariesChanged(bool aNotifyChildProcesses) {
292 // Now we have finished updating the list of dictionaries, update the current
293 // dictionary and any editors which may use it.
294 mozInlineSpellChecker::UpdateCanEnableInlineSpellChecking();
296 if (aNotifyChildProcesses) {
297 mozilla::dom::ContentParent_NotifyUpdatedDictionaries();
300 // Check if the current dictionaries are still available.
301 // If not, try to replace it with other dictionaries of the same language.
302 if (!mHunspells.IsEmpty()) {
303 nsTArray<nsCString> dictionaries;
304 for (auto iter = mHunspells.ConstIter(); !iter.Done(); iter.Next()) {
305 if (iter.Data().mEnabled) {
306 dictionaries.AppendElement(iter.Key());
309 nsresult rv = SetDictionaries(dictionaries);
310 if (NS_SUCCEEDED(rv)) return;
313 // If the current dictionaries are gone, and we don't have a good replacement,
314 // set no current dictionary.
315 if (!mHunspells.IsEmpty()) {
316 nsTArray<nsCString> empty;
317 SetDictionaries(empty);
321 NS_IMETHODIMP
322 mozHunspell::LoadDictionariesFromDir(nsIFile* aDir) {
323 nsresult rv;
325 bool check = false;
326 rv = aDir->Exists(&check);
327 if (NS_FAILED(rv) || !check) return NS_ERROR_UNEXPECTED;
329 rv = aDir->IsDirectory(&check);
330 if (NS_FAILED(rv) || !check) return NS_ERROR_UNEXPECTED;
332 nsCOMPtr<nsIDirectoryEnumerator> files;
333 rv = aDir->GetDirectoryEntries(getter_AddRefs(files));
334 if (NS_FAILED(rv)) return NS_ERROR_UNEXPECTED;
336 nsCOMPtr<nsIFile> file;
337 while (NS_SUCCEEDED(files->GetNextFile(getter_AddRefs(file))) && file) {
338 nsAutoString leafName;
339 file->GetLeafName(leafName);
340 if (!StringEndsWith(leafName, u".dic"_ns)) continue;
342 nsAutoString dict(leafName);
343 dict.SetLength(dict.Length() - 4); // magic length of ".dic"
345 // check for the presence of the .aff file
346 leafName = dict;
347 leafName.AppendLiteral(".aff");
348 file->SetLeafName(leafName);
349 rv = file->Exists(&check);
350 if (NS_FAILED(rv) || !check) continue;
352 // Replace '_' separator with '-'
353 dict.ReplaceChar('_', '-');
355 nsCOMPtr<nsIURI> uri;
356 rv = NS_NewFileURI(getter_AddRefs(uri), file);
357 NS_ENSURE_SUCCESS(rv, rv);
359 mDictionaries.InsertOrUpdate(dict, uri);
362 return NS_OK;
365 nsresult mozHunspell::DictionaryData::ConvertCharset(const nsAString& aStr,
366 std::string& aDst) {
367 if (NS_WARN_IF(!mEncoder)) {
368 return NS_ERROR_NOT_INITIALIZED;
371 auto src = Span(aStr.BeginReading(), aStr.Length());
372 CheckedInt<size_t> needed =
373 mEncoder->MaxBufferLengthFromUTF16WithoutReplacement(src.Length());
374 if (!needed.isValid()) {
375 return NS_ERROR_OUT_OF_MEMORY;
378 aDst.resize(needed.value());
380 char* dstPtr = &aDst[0];
381 auto dst = Span(reinterpret_cast<uint8_t*>(dstPtr), needed.value());
383 uint32_t result;
384 size_t written;
385 std::tie(result, std::ignore, written) =
386 mEncoder->EncodeFromUTF16WithoutReplacement(src, dst, true);
387 MOZ_ASSERT(result != kOutputFull);
388 if (result != kInputEmpty) {
389 return NS_ERROR_UENC_NOMAPPING;
391 aDst.resize(written);
392 mEncoder->Encoding()->NewEncoderInto(*mEncoder);
393 return NS_OK;
396 nsresult mozHunspell::DictionaryData::LoadIfNecessary() {
397 if (mHunspell && mEncoder && mDecoder) {
398 return NS_OK;
401 if (mLoadFailed) {
402 return NS_ERROR_FAILURE;
405 nsCString dictFileName = mAffixFileName;
406 int32_t dotPos = dictFileName.RFindChar('.');
407 if (dotPos == -1) {
408 mLoadFailed = true;
409 return NS_ERROR_FAILURE;
411 dictFileName.SetLength(dotPos);
412 dictFileName.AppendLiteral(".dic");
414 UniquePtr<RLBoxHunspell> hunspell(
415 RLBoxHunspell::Create(mAffixFileName, dictFileName));
416 if (!hunspell) {
417 mLoadFailed = true;
418 // TODO Bug 1788857: Verify error propagation in case of inaccessible file
419 return NS_ERROR_OUT_OF_MEMORY;
421 mHunspell = std::move(hunspell);
422 auto encoding =
423 Encoding::ForLabelNoReplacement(mHunspell->get_dict_encoding());
424 if (!encoding) {
425 mLoadFailed = true;
426 return NS_ERROR_UCONV_NOCONV;
428 mEncoder = encoding->NewEncoder();
429 mDecoder = encoding->NewDecoderWithoutBOMHandling();
430 return NS_OK;
433 NS_IMETHODIMP
434 mozHunspell::CollectReports(nsIHandleReportCallback* aHandleReport,
435 nsISupports* aData, bool aAnonymize) {
436 MOZ_COLLECT_REPORT("explicit/spell-check", KIND_HEAP, UNITS_BYTES,
437 HunspellAllocator::MemoryAllocated(),
438 "Memory used by the spell-checking engine.");
440 return NS_OK;
443 NS_IMETHODIMP
444 mozHunspell::Check(const nsAString& aWord, bool* aResult) {
445 if (NS_WARN_IF(!aResult)) {
446 return NS_ERROR_INVALID_ARG;
449 if (NS_WARN_IF(mHunspells.IsEmpty())) {
450 return NS_ERROR_FAILURE;
453 *aResult = true;
454 for (auto iter = mHunspells.Iter(); !iter.Done(); iter.Next()) {
455 if (!iter.Data().mEnabled) {
456 continue;
459 nsresult rv = iter.Data().LoadIfNecessary();
460 if (NS_FAILED(rv)) {
461 continue;
464 std::string charsetWord;
465 rv = iter.Data().ConvertCharset(aWord, charsetWord);
466 if (NS_FAILED(rv)) {
467 continue;
470 // Depending upon the encoding, we might end up with a string that begins
471 // with the null byte. Since the hunspell interface uses C-style strings,
472 // this appears like an empty string, and hunspell marks empty strings as
473 // spelled correctly. Skip these cases to allow another dictionary to have
474 // the chance to spellcheck them.
475 if (charsetWord.empty() || charsetWord[0] == 0) {
476 continue;
479 *aResult = iter.Data().mHunspell->spell(charsetWord);
480 if (*aResult) {
481 break;
485 if (!*aResult && mPersonalDictionary) {
486 return mPersonalDictionary->Check(aWord, aResult);
489 return NS_OK;
492 NS_IMETHODIMP
493 mozHunspell::Suggest(const nsAString& aWord, nsTArray<nsString>& aSuggestions) {
494 if (NS_WARN_IF(mHunspells.IsEmpty())) {
495 return NS_ERROR_FAILURE;
498 MOZ_ASSERT(aSuggestions.IsEmpty());
500 for (auto iter = mHunspells.Iter(); !iter.Done(); iter.Next()) {
501 if (!iter.Data().mEnabled) {
502 continue;
505 nsresult rv = iter.Data().LoadIfNecessary();
506 if (NS_FAILED(rv)) {
507 continue;
510 std::string charsetWord;
511 rv = iter.Data().ConvertCharset(aWord, charsetWord);
512 NS_ENSURE_SUCCESS(rv, rv);
514 std::vector<std::string> suggestions =
515 iter.Data().mHunspell->suggest(charsetWord);
516 if (!suggestions.empty()) {
517 aSuggestions.SetCapacity(aSuggestions.Length() + suggestions.size());
518 for (Span<const char> charSrc : suggestions) {
519 // Convert the suggestion to utf16
520 auto src = AsBytes(charSrc);
521 nsresult rv =
522 iter.Data().mDecoder->Encoding()->DecodeWithoutBOMHandling(
523 src, *aSuggestions.AppendElement());
524 NS_ENSURE_SUCCESS(rv, rv);
525 iter.Data().mDecoder->Encoding()->NewDecoderWithoutBOMHandlingInto(
526 *iter.Data().mDecoder);
531 return NS_OK;
534 NS_IMETHODIMP
535 mozHunspell::Observe(nsISupports* aSubj, const char* aTopic,
536 const char16_t* aData) {
537 NS_ASSERTION(!strcmp(aTopic, "profile-do-change") ||
538 !strcmp(aTopic, "profile-after-change"),
539 "Unexpected observer topic");
541 LoadDictionaryList(false);
543 return NS_OK;
546 NS_IMETHODIMP mozHunspell::AddDirectory(nsIFile* aDir) {
547 mDynamicDirectories.AppendObject(aDir);
548 LoadDictionaryList(true);
549 return NS_OK;
552 NS_IMETHODIMP mozHunspell::RemoveDirectory(nsIFile* aDir) {
553 mDynamicDirectories.RemoveObject(aDir);
554 LoadDictionaryList(true);
556 #ifdef MOZ_THUNDERBIRD
558 * This notification is needed for Thunderbird. Thunderbird derives the
559 * dictionary from the document's "lang" attribute. If a dictionary is
560 * removed, we need to change the "lang" attribute.
562 nsCOMPtr<nsIObserverService> obs = mozilla::services::GetObserverService();
563 if (obs) {
564 obs->NotifyObservers(nullptr, SPELLCHECK_DICTIONARY_REMOVE_NOTIFICATION,
565 nullptr);
567 #endif
568 return NS_OK;
571 NS_IMETHODIMP mozHunspell::AddDictionary(const nsAString& aLang,
572 nsIURI* aFile) {
573 NS_ENSURE_TRUE(aFile, NS_ERROR_INVALID_ARG);
575 mDynamicDictionaries.InsertOrUpdate(aLang, aFile);
576 mDictionaries.InsertOrUpdate(aLang, aFile);
577 DictionariesChanged(true);
578 return NS_OK;
581 NS_IMETHODIMP mozHunspell::RemoveDictionary(const nsAString& aLang,
582 nsIURI* aFile, bool* aRetVal) {
583 NS_ENSURE_TRUE(aFile, NS_ERROR_INVALID_ARG);
584 *aRetVal = false;
586 nsCOMPtr<nsIURI> file = mDynamicDictionaries.Get(aLang);
587 bool equal;
588 if (file && NS_SUCCEEDED(file->Equals(aFile, &equal)) && equal) {
589 mDynamicDictionaries.Remove(aLang);
590 LoadDictionaryList(true);
591 *aRetVal = true;
593 return NS_OK;