1 from __future__
import absolute_import
, division
, print_function
, unicode_literals
8 2012 Peter Norvig (http://norvig.com/spell-correct.html)
9 2013-2018 David Aguilar <davvid@gmail.com>
12 alphabet
= 'abcdefghijklmnopqrstuvwxyz'
15 def train(features
, model
):
22 splits
= [(word
[:i
], word
[i
:]) for i
in range(len(word
) + 1)]
23 deletes
= [a
+ b
[1:] for a
, b
in splits
if b
]
24 transposes
= [a
+ b
[1] + b
[0] + b
[2:] for a
, b
in splits
if len(b
) > 1]
25 replaces
= [a
+ c
+ b
[1:] for a
, b
in splits
for c
in alphabet
if b
]
26 inserts
= [a
+ c
+ b
for a
, b
in splits
for c
in alphabet
]
27 return set(deletes
+ transposes
+ replaces
+ inserts
)
30 def known_edits2(word
, words
):
31 return set(e2
for e1
in edits1(word
) for e2
in edits1(e1
) if e2
in words
)
34 def known(word
, words
):
35 return set(w
for w
in word
if w
in words
)
38 def suggest(word
, words
):
41 or known(edits1(word
), words
)
42 or known_edits2(word
, words
)
48 def correct(word
, words
):
49 candidates
= suggest(word
, words
)
50 return max(candidates
, key
=words
.get
)
53 class NorvigSpellCheck(object):
56 words
='/usr/share/dict/words',
57 cracklib
='/usr/share/dict/cracklib-small',
58 propernames
='/usr/share/dict/propernames',
60 self
.dictwords
= words
61 self
.cracklib
= cracklib
62 self
.propernames
= propernames
63 self
.words
= collections
.defaultdict(lambda: 1)
64 self
.extra_words
= set()
65 self
.dictionary
= None
66 self
.initialized
= False
68 def set_dictionary(self
, dictionary
):
69 self
.dictionary
= dictionary
74 self
.initialized
= True
75 train(self
.read(), self
.words
)
76 train(self
.extra_words
, self
.words
)
78 def add_word(self
, word
):
79 self
.extra_words
.add(word
)
81 def suggest(self
, word
):
83 return suggest(word
, self
.words
)
85 def check(self
, word
):
87 return word
.replace('.', '') in self
.words
90 """Read dictionary words"""
93 words
= self
.dictwords
94 cracklib
= self
.cracklib
95 propernames
= self
.propernames
96 cfg_dictionary
= self
.dictionary
98 if cracklib
and os
.path
.exists(cracklib
):
99 paths
.append((cracklib
, True))
100 elif words
and os
.path
.exists(words
):
101 paths
.append((words
, True))
103 if propernames
and os
.path
.exists(propernames
):
104 paths
.append((propernames
, False))
106 if cfg_dictionary
and os
.path
.exists(cfg_dictionary
):
107 paths
.append((cfg_dictionary
, False))
109 for (path
, title
) in paths
:
111 with
open(path
, 'r') as f
:
113 word
= core
.decode(word
.rstrip())