5 from . import resources
8 2012 Peter Norvig (http://norvig.com/spell-correct.html)
9 2013-2018 David Aguilar <davvid@gmail.com>
12 ALPHABET
= 'abcdefghijklmnopqrstuvwxyz'
15 def train(features
, model
):
22 splits
= [(word
[:i
], word
[i
:]) for i
in range(len(word
) + 1)]
23 deletes
= [a
+ b
[1:] for a
, b
in splits
if b
]
24 transposes
= [a
+ b
[1] + b
[0] + b
[2:] for a
, b
in splits
if len(b
) > 1]
25 replaces
= [a
+ c
+ b
[1:] for a
, b
in splits
for c
in ALPHABET
if b
]
26 inserts
= [a
+ c
+ b
for a
, b
in splits
for c
in ALPHABET
]
27 return set(deletes
+ transposes
+ replaces
+ inserts
)
30 def known_edits2(word
, words
):
31 return {e2
for e1
in edits1(word
) for e2
in edits1(e1
) if e2
in words
}
34 def known(word
, words
):
35 return {w
for w
in word
if w
in words
}
38 def suggest(word
, words
):
41 or known(edits1(word
), words
)
42 or known_edits2(word
, words
)
48 def correct(word
, words
):
49 candidates
= suggest(word
, words
)
50 return max(candidates
, key
=words
.get
)
53 class NorvigSpellCheck
:
57 propernames
='dict/propernames',
59 data_dirs
= resources
.xdg_data_dirs()
60 self
.dictwords
= resources
.find_first(words
, data_dirs
)
61 self
.propernames
= resources
.find_first(propernames
, data_dirs
)
62 self
.words
= collections
.defaultdict(lambda: 1)
63 self
.extra_words
= set()
64 self
.dictionary
= None
65 self
.initialized
= False
67 def set_dictionary(self
, dictionary
):
68 self
.dictionary
= dictionary
73 self
.initialized
= True
74 train(self
.read(), self
.words
)
75 train(self
.extra_words
, self
.words
)
77 def add_word(self
, word
):
78 self
.extra_words
.add(word
)
80 def suggest(self
, word
):
82 return suggest(word
, self
.words
)
84 def check(self
, word
):
86 return word
.replace('.', '') in self
.words
89 """Read dictionary words"""
92 words
= self
.dictwords
93 propernames
= self
.propernames
94 cfg_dictionary
= self
.dictionary
96 if words
and os
.path
.exists(words
):
97 paths
.append((words
, True))
99 if propernames
and os
.path
.exists(propernames
):
100 paths
.append((propernames
, False))
102 if cfg_dictionary
and os
.path
.exists(cfg_dictionary
):
103 paths
.append((cfg_dictionary
, False))
105 for path
, title
in paths
:
108 path
, 'r', encoding
='utf-8', errors
='ignore'
110 for line
in words_file
: