Update copyright year to 2014 by running admin/update-copyright.
[emacs.git] / lisp / progmodes / cc-guess.el
blob8a379d429bdec404fc5ed9d3745bb5ee7afd96b4
1 ;;; cc-guess.el --- guess indentation values by scanning existing code
3 ;; Copyright (C) 1985, 1987, 1992-2006, 2011-2014 Free Software
4 ;; Foundation, Inc.
6 ;; Author: 1994-1995 Barry A. Warsaw
7 ;; 2011- Masatake YAMATO
8 ;; Maintainer: bug-cc-mode@gnu.org
9 ;; Created: August 1994, split from cc-mode.el
10 ;; Version: See cc-mode.el
11 ;; Keywords: c languages oop
13 ;; This file is part of GNU Emacs.
15 ;; GNU Emacs is free software: you can redistribute it and/or modify
16 ;; it under the terms of the GNU General Public License as published by
17 ;; the Free Software Foundation, either version 3 of the License, or
18 ;; (at your option) any later version.
20 ;; GNU Emacs is distributed in the hope that it will be useful,
21 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
22 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 ;; GNU General Public License for more details.
25 ;; You should have received a copy of the GNU General Public License
26 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
28 ;;; Commentary:
30 ;; This file contains routines that help guess the cc-mode style in a
31 ;; particular region/buffer. Here style means `c-offsets-alist' and
32 ;; `c-basic-offset'.
34 ;; The main entry point of this program is `c-guess' command but there
35 ;; are some variants.
37 ;; Suppose the major mode for the current buffer is one of the modes
38 ;; provided by cc-mode. `c-guess' guesses the indentation style by
39 ;; examining the indentation in the region between beginning of buffer
40 ;; and `c-guess-region-max'.
42 ;; and installs the guessed style. The name for installed style is given
43 ;; by `c-guess-style-name'.
45 ;; `c-guess-buffer' does the same but in the whole buffer.
46 ;; `c-guess-region' does the same but in the region between the point
47 ;; and the mark. `c-guess-no-install', `c-guess-buffer-no-install'
48 ;; and `c-guess-region-no-install' guess the indentation style but
49 ;; don't install it. You can review a guessed style with `c-guess-view'.
50 ;; After reviewing, use `c-guess-install' to install the style
51 ;; if you prefer it.
53 ;; If you want to reuse the guessed style in another buffer,
54 ;; run `c-set-style' command with the name of the guessed style:
55 ;; "*c-guess*:<name-of-file-which-examined-when-guessing>".
56 ;; Once the guessed style is installed explicitly with `c-guess-install'
57 ;; or implicitly with `c-guess', `c-guess-buffer', or `c-guess-region',
58 ;; a style name is given by `c-guess-style-name' with the above form.
60 ;; If you want to reuse the guessed style in future emacs sessions,
61 ;; you may want to put it to your .emacs. `c-guess-view' is for
62 ;; you. It emits emacs lisp code which defines the last guessed
63 ;; style, in a temporary buffer. You can put the emitted code into
64 ;; your .emacs. This command was suggested by Alan Mackenzie.
66 ;;; Code:
68 (eval-when-compile
69 (let ((load-path
70 (if (and (boundp 'byte-compile-dest-file)
71 (stringp byte-compile-dest-file))
72 (cons (file-name-directory byte-compile-dest-file) load-path)
73 load-path)))
74 (load "cc-bytecomp" nil t)))
76 (cc-require 'cc-defs)
77 (cc-require 'cc-engine)
78 (cc-require 'cc-styles)
82 (defcustom c-guess-offset-threshold 10
83 "Threshold of acceptable offsets when examining indent information.
84 Discard an examined offset if its absolute value is greater than this.
86 The offset of a line included in the indent information returned by
87 `c-guess-basic-syntax'."
88 :version "24.1"
89 :type 'integer
90 :group 'c)
92 (defcustom c-guess-region-max 50000
93 "The maximum region size for examining indent information with `c-guess'.
94 It takes a long time to examine indent information from a large region;
95 this option helps you limit that time. `nil' means no limit."
96 :version "24.1"
97 :type 'integer
98 :group 'c)
101 ;;;###autoload
102 (defvar c-guess-guessed-offsets-alist nil
103 "Currently guessed offsets-alist.")
104 ;;;###autoload
105 (defvar c-guess-guessed-basic-offset nil
106 "Currently guessed basic-offset.")
108 (defvar c-guess-accumulator nil)
109 ;; Accumulated examined indent information. Information is represented
110 ;; in a list. Each element in it has following structure:
112 ;; (syntactic-symbol ((indentation-offset1 . number-of-times1)
113 ;; (indentation-offset2 . number-of-times2)
114 ;; ...))
116 ;; This structure is built by `c-guess-accumulate-offset'.
118 ;; Here we call the pair (indentation-offset1 . number-of-times1) a
119 ;; counter. `c-guess-sort-accumulator' sorts the order of
120 ;; counters by number-of-times.
121 ;; Use `c-guess-dump-accumulator' to see the value.
123 (defconst c-guess-conversions
124 '((c . c-lineup-C-comments)
125 (inher-cont . c-lineup-multi-inher)
126 (string . -1000)
127 (comment-intro . c-lineup-comment)
128 (arglist-cont-nonempty . c-lineup-arglist)
129 (arglist-close . c-lineup-close-paren)
130 (cpp-macro . -1000)))
133 ;;;###autoload
134 (defun c-guess (&optional accumulate)
135 "Guess the style in the region up to `c-guess-region-max', and install it.
137 The style is given a name based on the file's absolute file name.
139 If given a prefix argument (or if the optional argument ACCUMULATE is
140 non-nil) then the previous guess is extended, otherwise a new guess is
141 made from scratch."
142 (interactive "P")
143 (c-guess-region (point-min)
144 (min (point-max) (or c-guess-region-max
145 (point-max)))
146 accumulate))
148 ;;;###autoload
149 (defun c-guess-no-install (&optional accumulate)
150 "Guess the style in the region up to `c-guess-region-max'; don't install it.
152 If given a prefix argument (or if the optional argument ACCUMULATE is
153 non-nil) then the previous guess is extended, otherwise a new guess is
154 made from scratch."
155 (interactive "P")
156 (c-guess-region-no-install (point-min)
157 (min (point-max) (or c-guess-region-max
158 (point-max)))
159 accumulate))
161 ;;;###autoload
162 (defun c-guess-buffer (&optional accumulate)
163 "Guess the style on the whole current buffer, and install it.
165 The style is given a name based on the file's absolute file name.
167 If given a prefix argument (or if the optional argument ACCUMULATE is
168 non-nil) then the previous guess is extended, otherwise a new guess is
169 made from scratch."
170 (interactive "P")
171 (c-guess-region (point-min)
172 (point-max)
173 accumulate))
175 ;;;###autoload
176 (defun c-guess-buffer-no-install (&optional accumulate)
177 "Guess the style on the whole current buffer; don't install it.
179 If given a prefix argument (or if the optional argument ACCUMULATE is
180 non-nil) then the previous guess is extended, otherwise a new guess is
181 made from scratch."
182 (interactive "P")
183 (c-guess-region-no-install (point-min)
184 (point-max)
185 accumulate))
187 ;;;###autoload
188 (defun c-guess-region (start end &optional accumulate)
189 "Guess the style on the region and install it.
191 The style is given a name based on the file's absolute file name.
193 If given a prefix argument (or if the optional argument ACCUMULATE is
194 non-nil) then the previous guess is extended, otherwise a new guess is
195 made from scratch."
196 (interactive "r\nP")
197 (c-guess-region-no-install start end accumulate)
198 (c-guess-install))
201 (defsubst c-guess-empty-line-p ()
202 (eq (line-beginning-position)
203 (line-end-position)))
205 ;;;###autoload
206 (defun c-guess-region-no-install (start end &optional accumulate)
207 "Guess the style on the region; don't install it.
209 Every line of code in the region is examined and values for the following two
210 variables are guessed:
212 * `c-basic-offset', and
213 * the indentation values of the various syntactic symbols in
214 `c-offsets-alist'.
216 The guessed values are put into `c-guess-guessed-basic-offset' and
217 `c-guess-guessed-offsets-alist'.
219 Frequencies of use are taken into account when guessing, so minor
220 inconsistencies in the indentation style shouldn't produce wrong guesses.
222 If given a prefix argument (or if the optional argument ACCUMULATE is
223 non-nil) then the previous examination is extended, otherwise a new
224 guess is made from scratch.
226 Note that the larger the region to guess in, the slower the guessing.
227 So you can limit the region with `c-guess-region-max'."
228 (interactive "r\nP")
229 (let ((accumulator (when accumulate c-guess-accumulator)))
230 (setq c-guess-accumulator (c-guess-examine start end accumulator))
231 (let ((pair (c-guess-guess c-guess-accumulator)))
232 (setq c-guess-guessed-basic-offset (car pair)
233 c-guess-guessed-offsets-alist (cdr pair)))))
236 (defun c-guess-examine (start end accumulator)
237 (let ((reporter (when (fboundp 'make-progress-reporter)
238 (make-progress-reporter "Examining Indentation "
239 start
240 end))))
241 (save-excursion
242 (goto-char start)
243 (while (< (point) end)
244 (unless (c-guess-empty-line-p)
245 (mapc (lambda (s)
246 (setq accumulator (or (c-guess-accumulate accumulator s)
247 accumulator)))
248 (c-save-buffer-state () (c-guess-basic-syntax))))
249 (when reporter (progress-reporter-update reporter (point)))
250 (forward-line 1)))
251 (when reporter (progress-reporter-done reporter)))
252 (c-guess-sort-accumulator accumulator))
254 (defun c-guess-guess (accumulator)
255 ;; Guess basic-offset and offsets-alist from ACCUMULATOR,
256 ;; then return them as a cons: (basic-offset . offsets-alist).
257 ;; See the comments at `c-guess-accumulator' about the format
258 ;; ACCUMULATOR.
259 (let* ((basic-offset (c-guess-make-basic-offset accumulator))
260 (typical-offsets-alist (c-guess-make-offsets-alist
261 accumulator))
262 (symbolic-offsets-alist (c-guess-symbolize-offsets-alist
263 typical-offsets-alist
264 basic-offset))
265 (merged-offsets-alist (c-guess-merge-offsets-alists
266 (copy-tree c-guess-conversions)
267 symbolic-offsets-alist)))
268 (cons basic-offset merged-offsets-alist)))
270 (defun c-guess-current-offset (relpos)
271 ;; Calculate relative indentation (point) to RELPOS.
272 (- (progn (back-to-indentation)
273 (current-column))
274 (save-excursion
275 (goto-char relpos)
276 (current-column))))
278 (defun c-guess-accumulate (accumulator syntax-element)
279 ;; Add SYNTAX-ELEMENT to ACCUMULATOR.
280 (let ((symbol (car syntax-element))
281 (relpos (cadr syntax-element)))
282 (when (numberp relpos)
283 (let ((offset (c-guess-current-offset relpos)))
284 (when (< (abs offset) c-guess-offset-threshold)
285 (c-guess-accumulate-offset accumulator
286 symbol
287 offset))))))
289 (defun c-guess-accumulate-offset (accumulator symbol offset)
290 ;; Added SYMBOL and OFFSET to ACCUMULATOR. See
291 ;; `c-guess-accumulator' about the structure of ACCUMULATOR.
292 (let* ((entry (assoc symbol accumulator))
293 (counters (cdr entry))
294 counter)
295 (if entry
296 (progn
297 (setq counter (assoc offset counters))
298 (if counter
299 (setcdr counter (1+ (cdr counter)))
300 (setq counters (cons (cons offset 1) counters))
301 (setcdr entry counters))
302 accumulator)
303 (cons (cons symbol (cons (cons offset 1) nil)) accumulator))))
305 (defun c-guess-sort-accumulator (accumulator)
306 ;; Sort each element of ACCUMULATOR by the number-of-times. See
307 ;; `c-guess-accumulator' for more details.
308 (mapcar
309 (lambda (entry)
310 (let ((symbol (car entry))
311 (counters (cdr entry)))
312 (cons symbol (sort counters
313 (lambda (a b)
314 (if (> (cdr a) (cdr b))
316 (and
317 (eq (cdr a) (cdr b))
318 (< (car a) (car b)))))))))
319 accumulator))
321 (defun c-guess-make-offsets-alist (accumulator)
322 ;; Throw away the rare cases in accumulator and make an offsets-alist structure.
323 (mapcar
324 (lambda (entry)
325 (cons (car entry)
326 (car (car (cdr entry)))))
327 accumulator))
329 (defun c-guess-merge-offsets-alists (strong weak)
330 ;; Merge two offsets-alists into one.
331 ;; When two offsets-alists have the same symbol
332 ;; entry, give STRONG priority over WEAK.
333 (mapc
334 (lambda (weak-elt)
335 (unless (assoc (car weak-elt) strong)
336 (setq strong (cons weak-elt strong))))
337 weak)
338 strong)
340 (defun c-guess-make-basic-offset (accumulator)
341 ;; As candidate for `c-basic-offset', find the most frequently appearing
342 ;; indentation-offset in ACCUMULATOR.
343 (let* (;; Drop the value related to `c' syntactic-symbol.
344 ;; (`c': Inside a multiline C style block comment.)
345 ;; The impact for values of `c' is too large for guessing
346 ;; `basic-offset' if the target source file is small and its license
347 ;; notice is at top of the file.
348 (accumulator (assq-delete-all 'c (copy-tree accumulator)))
349 ;; Drop syntactic-symbols from ACCUMULATOR.
350 (alist (apply #'append (mapcar (lambda (elts)
351 (mapcar (lambda (elt)
352 (cons (abs (car elt))
353 (cdr elt)))
354 (cdr elts)))
355 accumulator)))
356 ;; Gather all indentation-offsets other than 0.
357 ;; 0 is meaningless as `basic-offset'.
358 (offset-list (delete 0
359 (delete-dups (mapcar
360 (lambda (elt) (car elt))
361 alist))))
362 ;; Sum of number-of-times for offset:
363 ;; (offset . sum)
364 (summed (mapcar (lambda (offset)
365 (cons offset
366 (apply #'+
367 (mapcar (lambda (a)
368 (if (eq (car a) offset)
369 (cdr a)
371 alist))))
372 offset-list)))
374 ;; Find the majority.
376 (let ((majority '(nil . 0)))
377 (while summed
378 (when (< (cdr majority) (cdr (car summed)))
379 (setq majority (car summed)))
380 (setq summed (cdr summed)))
381 (car majority))))
383 (defun c-guess-symbolize-offsets-alist (offsets-alist basic-offset)
384 ;; Convert the representation of OFFSETS-ALIST to an alist using
385 ;; `+', `-', `++', `--', `*', or `/'. These symbols represent
386 ;; a value relative to BASIC-OFFSET. Their meaning can be found
387 ;; in the CC Mode manual.
388 (mapcar
389 (lambda (elt)
390 (let ((s (car elt))
391 (v (cdr elt)))
392 (cond
393 ((integerp v)
394 (cons s (c-guess-symbolize-integer v
395 basic-offset)))
396 (t elt))))
397 offsets-alist))
399 (defun c-guess-symbolize-integer (int basic-offset)
400 (let ((aint (abs int)))
401 (cond
402 ((eq int basic-offset) '+)
403 ((eq aint basic-offset) '-)
404 ((eq int (* 2 basic-offset)) '++)
405 ((eq aint (* 2 basic-offset)) '--)
406 ((eq (* 2 int) basic-offset) '*)
407 ((eq (* 2 aint) basic-offset) '-)
408 (t int))))
410 (defun c-guess-style-name ()
411 ;; Make a style name for the guessed style.
412 (format "*c-guess*:%s" (buffer-file-name)))
414 (defun c-guess-make-style (basic-offset offsets-alist)
415 (when basic-offset
416 ;; Make a style from guessed values.
417 (let* ((offsets-alist (c-guess-merge-offsets-alists
418 offsets-alist
419 c-offsets-alist)))
420 `((c-basic-offset . ,basic-offset)
421 (c-offsets-alist . ,offsets-alist)))))
423 ;;;###autoload
424 (defun c-guess-install (&optional style-name)
425 "Install the latest guessed style into the current buffer.
426 \(This guessed style is a combination of `c-guess-guessed-basic-offset',
427 `c-guess-guessed-offsets-alist' and `c-offsets-alist'.)
429 The style is entered into CC Mode's style system by
430 `c-add-style'. Its name is either STYLE-NAME, or a name based on
431 the absolute file name of the file if STYLE-NAME is nil."
432 (interactive "sNew style name (empty for default name): ")
433 (let* ((style (c-guess-make-style c-guess-guessed-basic-offset
434 c-guess-guessed-offsets-alist)))
435 (if style
436 (let ((style-name (or (if (equal style-name "")
438 style-name)
439 (c-guess-style-name))))
440 (c-add-style style-name style t)
441 (message "Style \"%s\" is installed" style-name))
442 (error "Not yet guessed"))))
444 (defun c-guess-dump-accumulator ()
445 "Show `c-guess-accumulator'."
446 (interactive)
447 (with-output-to-temp-buffer "*Accumulated Examined Indent Information*"
448 (pp c-guess-accumulator)))
450 (defun c-guess-reset-accumulator ()
451 "Reset `c-guess-accumulator'."
452 (interactive)
453 (setq c-guess-accumulator nil))
455 (defun c-guess-dump-guessed-values ()
456 "Show `c-guess-guessed-basic-offset' and `c-guess-guessed-offsets-alist'."
457 (interactive)
458 (with-output-to-temp-buffer "*Guessed Values*"
459 (princ "basic-offset: \n\t")
460 (pp c-guess-guessed-basic-offset)
461 (princ "\n\n")
462 (princ "offsets-alist: \n")
463 (pp c-guess-guessed-offsets-alist)
466 (defun c-guess-dump-guessed-style (&optional printer)
467 "Show the guessed style.
468 `pp' is used to print the style but if PRINTER is given,
469 PRINTER is used instead. If PRINTER is not `nil', it
470 is called with one argument, the guessed style."
471 (interactive)
472 (let ((style (c-guess-make-style c-guess-guessed-basic-offset
473 c-guess-guessed-offsets-alist)))
474 (if style
475 (with-output-to-temp-buffer "*Guessed Style*"
476 (funcall (if printer printer 'pp) style))
477 (error "Not yet guessed"))))
479 (defun c-guess-guessed-syntactic-symbols ()
480 ;; Return syntactic symbols in c-guess-guessed-offsets-alist
481 ;; but not in c-guess-conversions.
482 (let ((alist c-guess-guessed-offsets-alist)
484 (symbols nil))
485 (while alist
486 (setq elt (car alist)
487 alist (cdr alist))
488 (unless (assq (car elt) c-guess-conversions)
489 (setq symbols (cons (car elt)
490 symbols))))
491 symbols))
493 (defun c-guess-view-reorder-offsets-alist-in-style (style guessed-syntactic-symbols)
494 ;; Reorder the `c-offsets-alist' field of STYLE.
495 ;; If an entry in `c-offsets-alist' holds a guessed value, move it to
496 ;; front in the field. In addition alphabetical sort by entry name is done.
497 (setq style (copy-tree style))
498 (let ((offsets-alist-cell (assq 'c-offsets-alist style))
499 (guessed-syntactic-symbols (c-guess-guessed-syntactic-symbols)))
500 (setcdr offsets-alist-cell
501 (sort (cdr offsets-alist-cell)
502 (lambda (a b)
503 (let ((a-guessed? (memq (car a) guessed-syntactic-symbols))
504 (b-guessed? (memq (car b) guessed-syntactic-symbols)))
505 (cond
506 ((or (and a-guessed? b-guessed?)
507 (not (or a-guessed? b-guessed?)))
508 (string-lessp (symbol-name (car a))
509 (symbol-name (car b))))
510 (a-guessed? t)
511 (b-guessed? nil)))))))
512 style)
514 (defun c-guess-view-mark-guessed-entries (guessed-syntactic-symbols)
515 ;; Put " ; Guess value" markers on all entries which hold
516 ;; guessed values.
517 ;; `c-basic-offset' is always considered as holding a guessed value.
518 (let ((needs-markers (cons 'c-basic-offset
519 guessed-syntactic-symbols)))
520 (while needs-markers
521 (goto-char (point-min))
522 (when (search-forward (concat "("
523 (symbol-name (car needs-markers))
524 " ") nil t)
525 (move-end-of-line 1)
526 (comment-dwim nil)
527 (insert " Guessed value"))
528 (setq needs-markers
529 (cdr needs-markers)))))
531 (defun c-guess-view (&optional with-name)
532 "Emit emacs lisp code which defines the last guessed style.
533 So you can put the code into .emacs if you prefer the
534 guessed code.
535 \"STYLE NAME HERE\" is used as the name for the style in the
536 emitted code. If WITH-NAME is given, it is used instead.
537 WITH-NAME is expected as a string but if this function
538 called interactively with prefix argument, the value for
539 WITH-NAME is asked to the user."
540 (interactive "P")
541 (let* ((temporary-style-name (cond
542 ((stringp with-name) with-name)
543 (with-name (read-from-minibuffer
544 "New style name: "))
546 "STYLE NAME HERE")))
547 (guessed-style-name (c-guess-style-name))
548 (current-style-name c-indentation-style)
549 (parent-style-name (if (string-equal guessed-style-name
550 current-style-name)
551 ;; The guessed style is already installed.
552 ;; It cannot be used as the parent style.
553 ;; Use the default style for the current
554 ;; major mode as the parent style.
555 (cc-choose-style-for-mode
556 major-mode
557 c-default-style)
558 ;; The guessed style is not installed yet.
559 current-style-name)))
560 (c-guess-dump-guessed-style
561 (lambda (style)
562 (let ((guessed-syntactic-symbols (c-guess-guessed-syntactic-symbols)))
563 (pp `(c-add-style ,temporary-style-name
564 ',(cons parent-style-name
565 (c-guess-view-reorder-offsets-alist-in-style
566 style
567 guessed-syntactic-symbols))))
568 (with-current-buffer standard-output
569 (lisp-interaction-mode)
570 (c-guess-view-mark-guessed-entries
571 guessed-syntactic-symbols)
572 (buffer-enable-undo)))))))
575 (cc-provide 'cc-guess)
576 ;;; cc-guess.el ends here