1 from __future__
import absolute_import
, division
, print_function
, unicode_literals
6 from . import resources
9 2012 Peter Norvig (http://norvig.com/spell-correct.html)
10 2013-2018 David Aguilar <davvid@gmail.com>
13 alphabet
= 'abcdefghijklmnopqrstuvwxyz'
16 def train(features
, model
):
23 splits
= [(word
[:i
], word
[i
:]) for i
in range(len(word
) + 1)]
24 deletes
= [a
+ b
[1:] for a
, b
in splits
if b
]
25 transposes
= [a
+ b
[1] + b
[0] + b
[2:] for a
, b
in splits
if len(b
) > 1]
26 replaces
= [a
+ c
+ b
[1:] for a
, b
in splits
for c
in alphabet
if b
]
27 inserts
= [a
+ c
+ b
for a
, b
in splits
for c
in alphabet
]
28 return set(deletes
+ transposes
+ replaces
+ inserts
)
31 def known_edits2(word
, words
):
32 return set(e2
for e1
in edits1(word
) for e2
in edits1(e1
) if e2
in words
)
35 def known(word
, words
):
36 return set(w
for w
in word
if w
in words
)
39 def suggest(word
, words
):
42 or known(edits1(word
), words
)
43 or known_edits2(word
, words
)
49 def correct(word
, words
):
50 candidates
= suggest(word
, words
)
51 return max(candidates
, key
=words
.get
)
54 class NorvigSpellCheck(object):
58 propernames
='dict/propernames',
60 data_dirs
= resources
.xdg_data_dirs()
61 self
.dictwords
= resources
.find_first(words
, data_dirs
)
62 self
.propernames
= resources
.find_first(propernames
, data_dirs
)
63 self
.words
= collections
.defaultdict(lambda: 1)
64 self
.extra_words
= set()
65 self
.dictionary
= None
66 self
.initialized
= False
68 def set_dictionary(self
, dictionary
):
69 self
.dictionary
= dictionary
74 self
.initialized
= True
75 train(self
.read(), self
.words
)
76 train(self
.extra_words
, self
.words
)
78 def add_word(self
, word
):
79 self
.extra_words
.add(word
)
81 def suggest(self
, word
):
83 return suggest(word
, self
.words
)
85 def check(self
, word
):
87 return word
.replace('.', '') in self
.words
90 """Read dictionary words"""
93 words
= self
.dictwords
94 propernames
= self
.propernames
95 cfg_dictionary
= self
.dictionary
97 if words
and os
.path
.exists(words
):
98 paths
.append((words
, True))
100 if propernames
and os
.path
.exists(propernames
):
101 paths
.append((propernames
, False))
103 if cfg_dictionary
and os
.path
.exists(cfg_dictionary
):
104 paths
.append((cfg_dictionary
, False))
106 for (path
, title
) in paths
:
109 path
, 'r', encoding
='utf-8', errors
='ignore'
111 for line
in words_file
:
116 except (IOError, OSError):