doc: update v3.3 release notes draft
[git-cola.git] / cola / spellcheck.py
blobd513c9a08fb91ee85e6337bbe07e007bc3eb9d8f
1 from __future__ import division, absolute_import, unicode_literals
2 import collections
3 import os
5 from cola import core
7 __copyright__ = """
8 2012 Peter Norvig (http://norvig.com/spell-correct.html)
9 2013-2018 David Aguilar <davvid@gmail.com>
10 """
12 alphabet = 'abcdefghijklmnopqrstuvwxyz'
15 def train(features, model):
16 for f in features:
17 model[f] += 1
18 return model
21 def edits1(word):
22 splits = [(word[:i], word[i:]) for i in range(len(word) + 1)]
23 deletes = [a + b[1:] for a, b in splits if b]
24 transposes = [a + b[1] + b[0] + b[2:] for a, b in splits if len(b) > 1]
25 replaces = [a + c + b[1:] for a, b in splits for c in alphabet if b]
26 inserts = [a + c + b for a, b in splits for c in alphabet]
27 return set(deletes + transposes + replaces + inserts)
30 def known_edits2(word, words):
31 return set(e2 for e1 in edits1(word)
32 for e2 in edits1(e1) if e2 in words)
35 def known(word, words):
36 return set(w for w in word if w in words)
39 def suggest(word, words):
40 candidates = (known([word], words) or
41 known(edits1(word), words) or
42 known_edits2(word, words) or [word])
43 return candidates
46 def correct(word, words):
47 candidates = suggest(word, words)
48 return max(candidates, key=words.get)
51 class NorvigSpellCheck(object):
53 def __init__(self, words='/usr/share/dict/words',
54 cracklib='/usr/share/dict/cracklib-small',
55 propernames='/usr/share/dict/propernames'):
56 self.dictwords = words
57 self.cracklib = cracklib
58 self.propernames = propernames
59 self.words = collections.defaultdict(lambda: 1)
60 self.extra_words = set()
61 self.dictionary = None
62 self.initialized = False
64 def set_dictionary(self, dictionary):
65 self.dictionary = dictionary
67 def init(self):
68 if self.initialized:
69 return
70 self.initialized = True
71 train(self.read(), self.words)
72 train(self.extra_words, self.words)
74 def add_word(self, word):
75 self.extra_words.add(word)
77 def suggest(self, word):
78 self.init()
79 return suggest(word, self.words)
81 def check(self, word):
82 self.init()
83 return word.replace('.', '') in self.words
85 def read(self):
86 """Read dictionary words"""
87 paths = []
89 words = self.dictwords
90 cracklib = self.cracklib
91 propernames = self.propernames
92 cfg_dictionary = self.dictionary
94 if cracklib and os.path.exists(cracklib):
95 paths.append((cracklib, True))
96 elif words and os.path.exists(words):
97 paths.append((words, True))
99 if propernames and os.path.exists(propernames):
100 paths.append((propernames, False))
102 if cfg_dictionary and os.path.exists(cfg_dictionary):
103 paths.append((cfg_dictionary, False))
105 for (path, title) in paths:
106 try:
107 with open(path, 'r') as f:
108 for word in f:
109 word = core.decode(word.rstrip())
110 yield word
111 if title:
112 yield word.title()
113 except IOError:
114 pass