4712 Prefer 'parsable' over 'parseable' in the manual pages
[unleashed.git] / usr / src / tools / onbld / Checks / SpellCheck.py
blob8923b418185adbc7f8ec1c2d24450fde11b1b936
2 # CDDL HEADER START
4 # The contents of this file are subject to the terms of the
5 # Common Development and Distribution License (the "License").
6 # You may not use this file except in compliance with the License.
8 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 # or http://www.opensolaris.org/os/licensing.
10 # See the License for the specific language governing permissions
11 # and limitations under the License.
13 # When distributing Covered Code, include this CDDL HEADER in each
14 # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 # If applicable, add the following below this CDDL HEADER, with the
16 # fields enclosed by brackets "[]" replaced with your own identifying
17 # information: Portions Copyright [yyyy] [name of copyright owner]
19 # CDDL HEADER END
23 # Copyright 2016 Joyent, Inc.
26 import re, sys
28 spellMsg = '%s: Line %d contains "%s", a common misspelling of "%s"\n'
29 altMsg = '%s: Lined %d contains "%s"; please use "%s" instead for consistency with other documentation\n'
31 misspellings = {
32 'absense': 'absence',
33 'accessable': 'accessible',
34 'accomodate': 'accommodate',
35 'accomodation': 'accommodation',
36 'accross': 'across',
37 'acheive': 'achieve',
38 'addional': 'additional',
39 'addres': 'address',
40 'admininistrative': 'administrative',
41 'adminstered': 'administered',
42 'adminstrate': 'administrate',
43 'adminstration': 'administration',
44 'adminstrative': 'administrative',
45 'adminstrator': 'administrator',
46 'admissability': 'admissibility',
47 'adress': 'address',
48 'adressable': 'addressable',
49 'adressed': 'addressed',
50 'adressing': 'addressing, dressing',
51 'aginst': 'against',
52 'agression': 'aggression',
53 'agressive': 'aggressive',
54 'alot': 'a lot, allot',
55 'and and': 'and',
56 'apparantly': 'apparently',
57 'appearence': 'appearance',
58 'arguement': 'argument',
59 'assasination': 'assassination',
60 'auxilliary': 'auxiliary',
61 'basicly': 'basically',
62 'begining': 'beginning',
63 'belive': 'believe',
64 'beteen': 'between',
65 'betwen': 'between',
66 'beween': 'between',
67 'bewteen': 'between',
68 'bizzare': 'bizarre',
69 'buisness': 'business',
70 'calender': 'calendar',
71 'cemetary': 'cemetery',
72 'chauffer': 'chauffeur',
73 'collegue': 'colleague',
74 'comming': 'coming',
75 'commited': 'committed',
76 'commitee': 'committee',
77 'commiting': 'committing',
78 'comparision': 'comparison',
79 'comparisions': 'comparisons',
80 'compatability': 'compatibility',
81 'compatable': 'compatible',
82 'compatablity': 'compatibility',
83 'compatiable': 'compatible',
84 'compatiblity': 'compatibility',
85 'completly': 'completely',
86 'concious': 'conscious',
87 'condidtion': 'condition',
88 'conected': 'connected',
89 'conjuction': 'conjunction',
90 'continous': 'continuous',
91 'curiousity': 'curiosity',
92 'deamon': 'daemon',
93 'definately': 'definitely',
94 'desireable': 'desirable',
95 'diffrent': 'different',
96 'dilemna': 'dilemma',
97 'dissapear': 'disappear',
98 'dissapoint': 'disappoint',
99 'ecstacy': 'ecstasy',
100 'embarass': 'embarrass',
101 'enviroment': 'environment',
102 'exept': 'except',
103 'existance': 'existence',
104 'familar': 'familiar',
105 'finaly': 'finally',
106 'folowing': 'following',
107 'foriegn': 'foreign',
108 'forseeable': 'foreseeable',
109 'fourty': 'forty',
110 'foward': 'forward',
111 'freind': 'friend',
112 'futher': 'further',
113 'gaurd': 'guard',
114 'glamourous': 'glamorous',
115 'goverment': 'government',
116 'happend': 'happened',
117 'harrassment': 'harassment',
118 'hierachical': 'hierarchical',
119 'hierachies': 'hierarchies',
120 'hierachy': 'hierarchy',
121 'hierarcical': 'hierarchical',
122 'hierarcy': 'hierarchy',
123 'honourary': 'honorary',
124 'humourous': 'humorous',
125 'idiosyncracy': 'idiosyncrasy',
126 'immediatly': 'immediately',
127 'inaccessable': 'inaccessible',
128 'inbetween': 'between',
129 'incidently': 'incidentally',
130 'independant': 'independent',
131 'infomation': 'information',
132 'interupt': 'interrupt',
133 'intial': 'initial',
134 'intially': 'initially',
135 'irresistable': 'irresistible',
136 'jist': 'gist',
137 'knowlege': 'knowledge',
138 'lenght': 'length',
139 'liase': 'liaise',
140 'liason': 'liaison',
141 'libary': 'library',
142 'maching': 'machine, marching, matching',
143 'millenia': 'millennia',
144 'millenium': 'millennium',
145 'neccessary': 'necessary',
146 'negotation': 'negotiation',
147 'nontheless': 'nonetheless',
148 'noticable': 'noticeable',
149 'occassion': 'occasion',
150 'occassional': 'occasional',
151 'occassionally': 'occasionally',
152 'occurance': 'occurrence',
153 'occured': 'occurred',
154 'occurence': 'occurrence',
155 'occuring': 'occurring',
156 'ommision': 'omission',
157 'orginal': 'original',
158 'orginally': 'originally',
159 'pavillion': 'pavilion',
160 'peice': 'piece',
161 'persistant': 'persistent',
162 'politican': 'politician',
163 'posession': 'possession',
164 'possiblity': 'possibility',
165 'preceed': 'precede',
166 'preceeded': 'preceded',
167 'preceeding': 'preceding',
168 'preceeds': 'precedes',
169 'prefered': 'preferred',
170 'prefering': 'preferring',
171 'presense': 'presence',
172 'proces': 'process',
173 'propoganda': 'propaganda',
174 'psuedo': 'pseudo',
175 'publically': 'publicly',
176 'realy': 'really',
177 'reciept': 'receipt',
178 'recieve': 'receive',
179 'recieved': 'received',
180 'reciever': 'receiver',
181 'recievers': 'receivers',
182 'recieves': 'receives',
183 'recieving': 'receiving',
184 'recomend': 'recommend',
185 'recomended': 'recommended',
186 'recomending': 'recommending',
187 'recomends': 'recommends',
188 'recurse': 'recur',
189 'recurses': 'recurs',
190 'recursing': 'recurring',
191 'refered': 'referred',
192 'refering': 'referring',
193 'religous': 'religious',
194 'rember': 'remember',
195 'remeber': 'remember',
196 'repetion': 'repetition',
197 'reponsible': 'responsible',
198 'resistence': 'resistance',
199 'retreive': 'retrieve',
200 'seige': 'siege',
201 'sence': 'since',
202 'seperate': 'separate',
203 'seperated': 'separated',
204 'seperately': 'separately',
205 'seperates': 'separates',
206 'similiar': 'similar',
207 'somwhere': 'somewhere',
208 'sould': 'could, should, sold, soul',
209 'sturcture': 'structure',
210 'succesful': 'successful',
211 'succesfully': 'successfully',
212 'successfull': 'successful',
213 'sucessful': 'successful',
214 'supercede': 'supersede',
215 'supress': 'suppress',
216 'supressed': 'suppressed',
217 'suprise': 'surprise',
218 'suprisingly': 'surprisingly',
219 'sytem': 'system',
220 'tendancy': 'tendency',
221 'the the': 'the',
222 'the these': 'these',
223 'therefor': 'therefore',
224 'threshhold': 'threshold',
225 'tolerence': 'tolerance',
226 'tommorow': 'tomorrow',
227 'tommorrow': 'tomorrow',
228 'tounge': 'tongue',
229 'tranformed': 'transformed',
230 'transfered': 'transferred',
231 'truely': 'truly',
232 'trustworthyness': 'trustworthiness',
233 'unforseen': 'unforeseen',
234 'unfortunatly': 'unfortunately',
235 'unsuccessfull': 'unsuccessful',
236 'untill': 'until',
237 'upto': 'up to',
238 'whereever': 'wherever',
239 'wich': 'which',
240 'wierd': 'weird',
241 'wtih': 'with',
244 alternates = {
245 'parseable': 'parsable',
246 'sub-command': 'subcommand',
247 'sub-commands': 'subcommands',
248 'writeable': 'writable'
251 misspellingREs = []
252 alternateREs = []
254 for misspelling, correct in misspellings.iteritems():
255 regex = re.compile(r'\b%s\b' % (misspelling), re.IGNORECASE)
256 entry = (regex, misspelling, correct)
257 misspellingREs.append(entry)
259 for alternate, correct in alternates.iteritems():
260 regex = re.compile(r'\b%s\b' % (alternate), re.IGNORECASE)
261 entry = (regex, alternate, correct)
262 alternateREs.append(entry)
264 def check(errmsg, output, filename, line, lineno, entry):
265 if entry[0].search(line):
266 output.write(errmsg % (filename, lineno, entry[1], entry[2]))
267 return 1
268 else:
269 return 0
271 def spellcheck(fh, filename=None, output=sys.stderr, **opts):
272 lineno = 1
273 ret = 0
275 if not filename:
276 filename = fh.name
278 fh.seek(0)
279 for line in fh:
280 for entry in misspellingREs:
281 ret |= check(spellMsg, output, filename, line,
282 lineno, entry)
283 for entry in alternateREs:
284 ret |= check(altMsg, output, filename, line,
285 lineno, entry)
286 lineno += 1
288 return ret