1 from __future__
import division
, absolute_import
, unicode_literals
8 2012 Peter Norvig (http://norvig.com/spell-correct.html)
9 2013-2018 David Aguilar <davvid@gmail.com>
12 alphabet
= 'abcdefghijklmnopqrstuvwxyz'
15 def train(features
, model
):
22 splits
= [(word
[:i
], word
[i
:]) for i
in range(len(word
) + 1)]
23 deletes
= [a
+ b
[1:] for a
, b
in splits
if b
]
24 transposes
= [a
+ b
[1] + b
[0] + b
[2:] for a
, b
in splits
if len(b
) > 1]
25 replaces
= [a
+ c
+ b
[1:] for a
, b
in splits
for c
in alphabet
if b
]
26 inserts
= [a
+ c
+ b
for a
, b
in splits
for c
in alphabet
]
27 return set(deletes
+ transposes
+ replaces
+ inserts
)
30 def known_edits2(word
, words
):
31 return set(e2
for e1
in edits1(word
)
32 for e2
in edits1(e1
) if e2
in words
)
35 def known(word
, words
):
36 return set(w
for w
in word
if w
in words
)
39 def suggest(word
, words
):
40 candidates
= (known([word
], words
) or
41 known(edits1(word
), words
) or
42 known_edits2(word
, words
) or [word
])
46 def correct(word
, words
):
47 candidates
= suggest(word
, words
)
48 return max(candidates
, key
=words
.get
)
51 class NorvigSpellCheck(object):
53 def __init__(self
, words
='/usr/share/dict/words',
54 cracklib
='/usr/share/dict/cracklib-small',
55 propernames
='/usr/share/dict/propernames'):
56 self
.dictwords
= words
57 self
.cracklib
= cracklib
58 self
.propernames
= propernames
59 self
.words
= collections
.defaultdict(lambda: 1)
60 self
.extra_words
= set()
61 self
.dictionary
= None
62 self
.initialized
= False
64 def set_dictionary(self
, dictionary
):
65 self
.dictionary
= dictionary
70 self
.initialized
= True
71 train(self
.read(), self
.words
)
72 train(self
.extra_words
, self
.words
)
74 def add_word(self
, word
):
75 self
.extra_words
.add(word
)
77 def suggest(self
, word
):
79 return suggest(word
, self
.words
)
81 def check(self
, word
):
83 return word
.replace('.', '') in self
.words
86 """Read dictionary words"""
89 words
= self
.dictwords
90 cracklib
= self
.cracklib
91 propernames
= self
.propernames
92 cfg_dictionary
= self
.dictionary
94 if cracklib
and os
.path
.exists(cracklib
):
95 paths
.append((cracklib
, True))
96 elif words
and os
.path
.exists(words
):
97 paths
.append((words
, True))
99 if propernames
and os
.path
.exists(propernames
):
100 paths
.append((propernames
, False))
102 if cfg_dictionary
and os
.path
.exists(cfg_dictionary
):
103 paths
.append((cfg_dictionary
, False))
105 for (path
, title
) in paths
:
107 with
open(path
, 'r') as f
:
109 word
= core
.decode(word
.rstrip())