browse: display errors when saving blobs
[git-cola.git] / cola / spellcheck.py
blob2c4571ab05620d300bd7010b15aec55be16ee507
1 from __future__ import absolute_import, division, print_function, unicode_literals
2 import codecs
3 import collections
4 import os
6 from . import resources
8 __copyright__ = """
9 2012 Peter Norvig (http://norvig.com/spell-correct.html)
10 2013-2018 David Aguilar <davvid@gmail.com>
11 """
13 alphabet = 'abcdefghijklmnopqrstuvwxyz'
16 def train(features, model):
17 for f in features:
18 model[f] += 1
19 return model
22 def edits1(word):
23 splits = [(word[:i], word[i:]) for i in range(len(word) + 1)]
24 deletes = [a + b[1:] for a, b in splits if b]
25 transposes = [a + b[1] + b[0] + b[2:] for a, b in splits if len(b) > 1]
26 replaces = [a + c + b[1:] for a, b in splits for c in alphabet if b]
27 inserts = [a + c + b for a, b in splits for c in alphabet]
28 return set(deletes + transposes + replaces + inserts)
31 def known_edits2(word, words):
32 return set(e2 for e1 in edits1(word) for e2 in edits1(e1) if e2 in words)
35 def known(word, words):
36 return set(w for w in word if w in words)
39 def suggest(word, words):
40 candidates = (
41 known([word], words)
42 or known(edits1(word), words)
43 or known_edits2(word, words)
44 or [word]
46 return candidates
49 def correct(word, words):
50 candidates = suggest(word, words)
51 return max(candidates, key=words.get)
54 class NorvigSpellCheck(object):
55 def __init__(
56 self,
57 words='dict/words',
58 propernames='dict/propernames',
60 data_dirs = resources.xdg_data_dirs()
61 self.dictwords = resources.find_first(words, data_dirs)
62 self.propernames = resources.find_first(propernames, data_dirs)
63 self.words = collections.defaultdict(lambda: 1)
64 self.extra_words = set()
65 self.dictionary = None
66 self.initialized = False
68 def set_dictionary(self, dictionary):
69 self.dictionary = dictionary
71 def init(self):
72 if self.initialized:
73 return
74 self.initialized = True
75 train(self.read(), self.words)
76 train(self.extra_words, self.words)
78 def add_word(self, word):
79 self.extra_words.add(word)
81 def suggest(self, word):
82 self.init()
83 return suggest(word, self.words)
85 def check(self, word):
86 self.init()
87 return word.replace('.', '') in self.words
89 def read(self):
90 """Read dictionary words"""
91 paths = []
93 words = self.dictwords
94 propernames = self.propernames
95 cfg_dictionary = self.dictionary
97 if words and os.path.exists(words):
98 paths.append((words, True))
100 if propernames and os.path.exists(propernames):
101 paths.append((propernames, False))
103 if cfg_dictionary and os.path.exists(cfg_dictionary):
104 paths.append((cfg_dictionary, False))
106 for (path, title) in paths:
107 try:
108 with codecs.open(
109 path, 'r', encoding='utf-8', errors='ignore'
110 ) as words_file:
111 for line in words_file:
112 word = line.rstrip()
113 yield word
114 if title:
115 yield word.title()
116 except (IOError, OSError):
117 pass