Update copyright year to 2014 by running admin/update-copyright.
[emacs.git] / lisp / cedet / semantic / wisent / python.el
blob086c4412b7b12166aace46bd52794a70bba53bae
1 ;;; wisent-python.el --- Semantic support for Python
3 ;; Copyright (C) 2002, 2004, 2006-2014 Free Software Foundation, Inc.
5 ;; Author: Richard Kim <emacs18@gmail.com>
6 ;; Maintainer: Richard Kim <emacs18@gmail.com>
7 ;; Created: June 2002
8 ;; Keywords: syntax
10 ;; This file is part of GNU Emacs.
12 ;; GNU Emacs is free software: you can redistribute it and/or modify
13 ;; it under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation, either version 3 of the License, or
15 ;; (at your option) any later version.
17 ;; GNU Emacs is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;; GNU General Public License for more details.
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
25 ;;; Commentary:
27 ;; Parser support for Python.
29 ;;; Code:
31 (require 'rx)
33 ;; Try to load python support, but fail silently since it is only used
34 ;; for optional functionality
35 (require 'python nil t)
37 (require 'semantic/wisent)
38 (require 'semantic/wisent/python-wy)
39 (require 'semantic/find)
40 (require 'semantic/dep)
41 (require 'semantic/ctxt)
42 (require 'semantic/format)
44 (eval-when-compile
45 (require 'cl))
47 ;;; Customization
50 (defun semantic-python-get-system-include-path ()
51 "Evaluate some Python code that determines the system include path."
52 (delq nil
53 (mapcar
54 (lambda (dir)
55 (when (file-directory-p dir)
56 dir))
57 (split-string
58 (python-shell-internal-send-string
59 "import sys;print ('\\n'.join(sys.path))")
60 "\n" t))))
62 (defcustom-mode-local-semantic-dependency-system-include-path
63 python-mode semantic-python-dependency-system-include-path
64 (when (and (featurep 'python)
65 ;; python-mode and batch somehow often hangs.
66 (not noninteractive))
67 (semantic-python-get-system-include-path))
68 "The system include path used by Python language.")
70 ;;; Lexical analysis
73 ;; Python strings are delimited by either single quotes or double
74 ;; quotes, e.g., "I'm a string" and 'I too am a string'.
75 ;; In addition a string can have either a 'r' and/or 'u' prefix.
76 ;; The 'r' prefix means raw, i.e., normal backslash substitutions are
77 ;; to be suppressed. For example, r"01\n34" is a string with six
78 ;; characters 0, 1, \, n, 3 and 4. The 'u' prefix means the following
79 ;; string is Unicode.
80 (defconst wisent-python-string-start-re "[uU]?[rR]?['\"]"
81 "Regexp matching beginning of a Python string.")
83 (defconst wisent-python-string-re
84 (rx
85 (opt (any "uU")) (opt (any "rR"))
86 (or
87 ;; Triple-quoted string using apostrophes
88 (: "'''" (zero-or-more (or "\\'"
89 (not (any "'"))
90 (: (repeat 1 2 "'") (not (any "'")))))
91 "'''")
92 ;; String using apostrophes
93 (: "'" (zero-or-more (or "\\'"
94 (not (any "'"))))
95 "'")
96 ;; Triple-quoted string using quotation marks.
97 (: "\"\"\"" (zero-or-more (or "\\\""
98 (not (any "\""))
99 (: (repeat 1 2 "\"") (not (any "\"")))))
100 "\"\"\"")
101 ;; String using quotation marks.
102 (: "\"" (zero-or-more (or "\\\""
103 (not (any "\""))))
104 "\"")))
105 "Regexp matching a complete Python string.")
107 (defvar wisent-python-EXPANDING-block nil
108 "Non-nil when expanding a paren block for Python lexical analyzer.")
110 (defun wisent-python-implicit-line-joining-p ()
111 "Return non-nil if implicit line joining is active.
112 That is, if inside an expression in parentheses, square brackets or
113 curly braces."
114 wisent-python-EXPANDING-block)
116 (defsubst wisent-python-forward-string ()
117 "Move point at the end of the Python string at point."
118 (if (looking-at wisent-python-string-re)
119 (let ((start (match-beginning 0))
120 (end (match-end 0)))
121 ;; Incomplete triple-quoted string gets matched instead as a
122 ;; complete single quoted string. (This special case would be
123 ;; unnecessary if Emacs regular expressions had negative
124 ;; look-ahead assertions.)
125 (when (and (= (- end start) 2)
126 (looking-at "\"\\{3\\}\\|'\\{3\\}"))
127 (error "unterminated syntax"))
128 (goto-char end))
129 (error "unterminated syntax")))
131 (defun wisent-python-forward-balanced-expression ()
132 "Move point to the end of the balanced expression at point.
133 Here 'balanced expression' means anything matched by Emacs'
134 open/close parenthesis syntax classes. We can't use forward-sexp
135 for this because that Emacs built-in can't parse Python's
136 triple-quoted string syntax."
137 (let ((end-char (cdr (syntax-after (point)))))
138 (forward-char 1)
139 (while (not (or (eobp) (eq (char-after (point)) end-char)))
140 (cond
141 ;; Skip over python strings.
142 ((looking-at wisent-python-string-start-re)
143 (wisent-python-forward-string))
144 ;; At a comment start just goto end of line.
145 ((looking-at "\\s<")
146 (end-of-line))
147 ;; Skip over balanced expressions.
148 ((looking-at "\\s(")
149 (wisent-python-forward-balanced-expression))
150 ;; Skip over white space, word, symbol, punctuation, paired
151 ;; delimiter (backquote) characters, line continuation, and end
152 ;; of comment characters (AKA newline characters in Python).
153 ((zerop (skip-syntax-forward "-w_.$\\>"))
154 (error "can't figure out how to go forward from here"))))
155 ;; Skip closing character. As a last resort this should raise an
156 ;; error if we hit EOB before we find our closing character..
157 (forward-char 1)))
159 (defun wisent-python-forward-line ()
160 "Move point to the beginning of the next logical line.
161 Usually this is simply the next physical line unless strings,
162 implicit/explicit line continuation, blank lines, or comment lines are
163 encountered. This function skips over such items so that the point is
164 at the beginning of the next logical line. If the current logical
165 line ends at the end of the buffer, leave the point there."
166 (while (not (eolp))
167 (when (= (point)
168 (progn
169 (cond
170 ;; Skip over python strings.
171 ((looking-at wisent-python-string-start-re)
172 (wisent-python-forward-string))
173 ;; At a comment start just goto end of line.
174 ((looking-at "\\s<")
175 (end-of-line))
176 ;; Skip over balanced expressions.
177 ((looking-at "\\s(")
178 (wisent-python-forward-balanced-expression))
179 ;; At the explicit line continuation character
180 ;; (backslash) move to next line.
181 ((looking-at "\\s\\")
182 (forward-line 1))
183 ;; Skip over white space, word, symbol, punctuation,
184 ;; and paired delimiter (backquote) characters.
185 ((skip-syntax-forward "-w_.$)")))
186 (point)))
187 (error "python-forward-line endless loop detected")))
188 ;; The point is at eol, skip blank and comment lines.
189 (forward-comment (point-max))
190 ;; Goto the beginning of the next line.
191 (or (eobp) (beginning-of-line)))
193 (defun wisent-python-forward-line-skip-indented ()
194 "Move point to the next logical line, skipping indented lines.
195 That is the next line whose indentation is less than or equal to
196 the indentation of the current line."
197 (let ((indent (current-indentation)))
198 (while (progn (wisent-python-forward-line)
199 (and (not (eobp))
200 (> (current-indentation) indent))))))
202 (defun wisent-python-end-of-block ()
203 "Move point to the end of the current block."
204 (let ((indent (current-indentation)))
205 (while (and (not (eobp)) (>= (current-indentation) indent))
206 (wisent-python-forward-line-skip-indented))
207 ;; Don't include final comments in current block bounds
208 (forward-comment (- (point-max)))
209 (or (bolp) (forward-line 1))
212 ;; Indentation stack, what the Python (2.3) language spec. says:
214 ;; The indentation levels of consecutive lines are used to generate
215 ;; INDENT and DEDENT tokens, using a stack, as follows.
217 ;; Before the first line of the file is read, a single zero is pushed
218 ;; on the stack; this will never be popped off again. The numbers
219 ;; pushed on the stack will always be strictly increasing from bottom
220 ;; to top. At the beginning of each logical line, the line's
221 ;; indentation level is compared to the top of the stack. If it is
222 ;; equal, nothing happens. If it is larger, it is pushed on the stack,
223 ;; and one INDENT token is generated. If it is smaller, it must be one
224 ;; of the numbers occurring on the stack; all numbers on the stack
225 ;; that are larger are popped off, and for each number popped off a
226 ;; DEDENT token is generated. At the end of the file, a DEDENT token
227 ;; is generated for each number remaining on the stack that is larger
228 ;; than zero.
229 (defvar wisent-python-indent-stack)
231 (define-lex-analyzer wisent-python-lex-beginning-of-line
232 "Detect and create Python indentation tokens at beginning of line."
233 (and
234 (bolp) (not (wisent-python-implicit-line-joining-p))
235 (let ((last-indent (car wisent-python-indent-stack))
236 (last-pos (point))
237 (curr-indent (current-indentation)))
238 (skip-syntax-forward "-")
239 (cond
240 ;; Skip comments and blank lines. No change in indentation.
241 ((or (eolp) (looking-at semantic-lex-comment-regex))
242 (forward-comment (point-max))
243 (or (eobp) (beginning-of-line))
244 (setq semantic-lex-end-point (point))
245 ;; Loop lexer to handle the next line.
247 ;; No change in indentation.
248 ((= curr-indent last-indent)
249 (setq semantic-lex-end-point (point))
250 ;; Try next analyzers.
251 nil)
252 ;; Indentation increased
253 ((> curr-indent last-indent)
254 (if (or (not semantic-lex-maximum-depth)
255 (< semantic-lex-current-depth semantic-lex-maximum-depth))
256 (progn
257 ;; Return an INDENT lexical token
258 (setq semantic-lex-current-depth (1+ semantic-lex-current-depth))
259 (push curr-indent wisent-python-indent-stack)
260 (semantic-lex-push-token
261 (semantic-lex-token 'INDENT last-pos (point))))
262 ;; Add an INDENT_BLOCK token
263 (semantic-lex-push-token
264 (semantic-lex-token
265 'INDENT_BLOCK
266 (progn (beginning-of-line) (point))
267 (semantic-lex-unterminated-syntax-protection 'INDENT_BLOCK
268 (wisent-python-end-of-block)
269 (point)))))
270 ;; Loop lexer to handle tokens in current line.
272 ;; Indentation decreased
273 ((progn
274 ;; Pop items from indentation stack
275 (while (< curr-indent last-indent)
276 (pop wisent-python-indent-stack)
277 (setq semantic-lex-current-depth (1- semantic-lex-current-depth)
278 last-indent (car wisent-python-indent-stack))
279 (semantic-lex-push-token
280 (semantic-lex-token 'DEDENT last-pos (point))))
281 (= last-pos (point)))
282 ;; If pos did not change, then we must return nil so that
283 ;; other lexical analyzers can be run.
284 nil))))
285 ;; All the work was done in the above analyzer matching condition.
288 (define-lex-regex-analyzer wisent-python-lex-end-of-line
289 "Detect and create Python newline tokens.
290 Just skip the newline character if the following line is an implicit
291 continuation of current line."
292 "\\(\n\\|\\s>\\)"
293 (if (wisent-python-implicit-line-joining-p)
294 (setq semantic-lex-end-point (match-end 0))
295 (semantic-lex-push-token
296 (semantic-lex-token 'NEWLINE (point) (match-end 0)))))
298 (define-lex-regex-analyzer wisent-python-lex-string
299 "Detect and create python string tokens."
300 wisent-python-string-start-re
301 (semantic-lex-push-token
302 (semantic-lex-token
303 'STRING_LITERAL
304 (point)
305 (semantic-lex-unterminated-syntax-protection 'STRING_LITERAL
306 (wisent-python-forward-string)
307 (point)))))
309 (define-lex-regex-analyzer wisent-python-lex-ignore-backslash
310 "Detect and skip over backslash (explicit line joining) tokens.
311 A backslash must be the last token of a physical line, it is illegal
312 elsewhere on a line outside a string literal."
313 "\\s\\\\s-*$"
314 ;; Skip over the detected backslash and go to the first
315 ;; non-whitespace character in the next physical line.
316 (forward-line)
317 (skip-syntax-forward "-")
318 (setq semantic-lex-end-point (point)))
320 (define-lex wisent-python-lexer
321 "Lexical Analyzer for Python code."
322 ;; Must analyze beginning of line first to handle indentation.
323 wisent-python-lex-beginning-of-line
324 wisent-python-lex-end-of-line
325 ;; Must analyze string before symbol to handle string prefix.
326 wisent-python-lex-string
327 ;; Analyzers auto-generated from grammar.
328 wisent-python-wy--<number>-regexp-analyzer
329 wisent-python-wy--<keyword>-keyword-analyzer
330 wisent-python-wy--<symbol>-regexp-analyzer
331 wisent-python-wy--<block>-block-analyzer
332 wisent-python-wy--<punctuation>-string-analyzer
333 ;; Ignored things.
334 wisent-python-lex-ignore-backslash
335 semantic-lex-ignore-whitespace
336 semantic-lex-ignore-comments
337 ;; Signal error on unhandled syntax.
338 semantic-lex-default-action)
341 ;;; Parsing
344 (defun wisent-python-reconstitute-function-tag (tag suite)
345 "Move a docstring from TAG's members into its :documentation attribute.
346 Set attributes for constructors, special, private and static methods."
347 ;; Analyze first statement to see whether it is a documentation
348 ;; string.
349 (let ((first-statement (car suite)))
350 (when (semantic-python-docstring-p first-statement)
351 (semantic-tag-put-attribute
352 tag :documentation
353 (semantic-python-extract-docstring first-statement))))
355 ;; TODO HACK: we try to identify methods using the following
356 ;; heuristic:
357 ;; + at least one argument
358 ;; + first argument is self
359 (when (and (> (length (semantic-tag-function-arguments tag)) 0)
360 (string= (semantic-tag-name
361 (first (semantic-tag-function-arguments tag)))
362 "self"))
363 (semantic-tag-put-attribute tag :parent "dummy"))
365 ;; Identify constructors, special and private functions
366 (cond
367 ;; TODO only valid when the function resides inside a class
368 ((string= (semantic-tag-name tag) "__init__")
369 (semantic-tag-put-attribute tag :constructor-flag t)
370 (semantic-tag-put-attribute tag :suite suite))
372 ((semantic-python-special-p tag)
373 (semantic-tag-put-attribute tag :special-flag t))
375 ((semantic-python-private-p tag)
376 (semantic-tag-put-attribute tag :protection "private")))
378 ;; If there is a staticmethod decorator, add a static typemodifier
379 ;; for the function.
380 (when (semantic-find-tags-by-name
381 "staticmethod"
382 (semantic-tag-get-attribute tag :decorators))
383 (semantic-tag-put-attribute
384 tag :typemodifiers
385 (cons "static"
386 (semantic-tag-get-attribute tag :typemodifiers))))
388 ;; TODO
389 ;; + check for decorators classmethod
390 ;; + check for operators
391 tag)
393 (defun wisent-python-reconstitute-class-tag (tag)
394 "Move a docstring from TAG's members into its :documentation attribute."
395 ;; The first member of TAG may be a documentation string. If that is
396 ;; the case, remove of it from the members list and stick its
397 ;; content into the :documentation attribute.
398 (let ((first-member (car (semantic-tag-type-members tag))))
399 (when (semantic-python-docstring-p first-member)
400 (semantic-tag-put-attribute
401 tag :members
402 (cdr (semantic-tag-type-members tag)))
403 (semantic-tag-put-attribute
404 tag :documentation
405 (semantic-python-extract-docstring first-member))))
407 ;; Try to find the constructor, determine the name of the instance
408 ;; parameter, find assignments to instance variables and add
409 ;; corresponding variable tags to the list of members.
410 (dolist (member (semantic-tag-type-members tag))
411 (when (semantic-tag-function-constructor-p member)
412 (let ((self (semantic-tag-name
413 (car (semantic-tag-function-arguments member)))))
414 (dolist (statement (semantic-tag-get-attribute member :suite))
415 (when (semantic-python-instance-variable-p statement self)
416 (let ((variable (semantic-tag-clone
417 statement
418 (substring (semantic-tag-name statement) 5)))
419 (members (semantic-tag-get-attribute tag :members)))
420 (when (semantic-python-private-p variable)
421 (semantic-tag-put-attribute variable :protection "private"))
422 (setcdr (last members) (list variable))))))))
424 ;; TODO remove the :suite attribute
425 tag)
427 (defun semantic-python-expand-tag (tag)
428 "Expand compound declarations found in TAG into separate tags.
429 TAG contains compound declaration if the NAME part of the tag is
430 a list. In python, this can happen with `import' statements."
431 (let ((class (semantic-tag-class tag))
432 (elts (semantic-tag-name tag))
433 (expand nil))
434 (cond
435 ((and (eq class 'include) (listp elts))
436 (dolist (E elts)
437 (setq expand (cons (semantic-tag-clone tag E) expand)))
438 (setq expand (nreverse expand)))
443 ;;; Overridden Semantic API.
446 (define-mode-local-override semantic-lex python-mode
447 (start end &optional depth length)
448 "Lexically analyze Python code in current buffer.
449 See the function `semantic-lex' for the meaning of the START, END,
450 DEPTH and LENGTH arguments.
451 This function calls `wisent-python-lexer' to actually perform the
452 lexical analysis, then emits the necessary Python DEDENT tokens from
453 what remains in the `wisent-python-indent-stack'."
454 (let* ((wisent-python-indent-stack (list 0))
455 (stream (wisent-python-lexer start end depth length))
456 (semantic-lex-token-stream nil))
457 ;; Emit DEDENT tokens if something remains in the INDENT stack.
458 (while (> (pop wisent-python-indent-stack) 0)
459 (semantic-lex-push-token (semantic-lex-token 'DEDENT end end)))
460 (nconc stream (nreverse semantic-lex-token-stream))))
462 (define-mode-local-override semantic-get-local-variables python-mode ()
463 "Get the local variables based on point's context.
464 To be implemented for Python! For now just return nil."
465 nil)
467 ;; Adapted from the semantic Java support by Andrey Torba
468 (define-mode-local-override semantic-tag-include-filename python-mode (tag)
469 "Return a suitable path for (some) Python imports."
470 (let ((name (semantic-tag-name tag)))
471 (concat (mapconcat 'identity (split-string name "\\.") "/") ".py")))
473 ;; Override ctxt-current-function/assignment defaults, since they do
474 ;; not work properly with Python code, even leading to endless loops
475 ;; (see bug #xxxxx).
476 (define-mode-local-override semantic-ctxt-current-function python-mode (&optional point)
477 "Return the current function call the cursor is in at POINT.
478 The function returned is the one accepting the arguments that
479 the cursor is currently in. It will not return function symbol if the
480 cursor is on the text representing that function."
481 nil)
483 (define-mode-local-override semantic-ctxt-current-assignment python-mode (&optional point)
484 "Return the current assignment near the cursor at POINT.
485 Return a list as per `semantic-ctxt-current-symbol'.
486 Return nil if there is nothing relevant."
487 nil)
489 ;;; Tag Formatting
491 (define-mode-local-override semantic-format-tag-abbreviate python-mode (tag &optional parent color)
492 "Format an abbreviated tag for python.
493 Shortens 'code' tags, but passes through for others."
494 (cond ((semantic-tag-of-class-p tag 'code)
495 ;; Just take the first line.
496 (let ((name (semantic-tag-name tag)))
497 (when (string-match "\n" name)
498 (setq name (substring name 0 (match-beginning 0))))
499 name))
501 (semantic-format-tag-abbreviate-default tag parent color))))
503 ;;; Enable Semantic in `python-mode'.
506 ;;;###autoload
507 (defun wisent-python-default-setup ()
508 "Setup buffer for parse."
509 (wisent-python-wy--install-parser)
510 (set (make-local-variable 'parse-sexp-ignore-comments) t)
511 ;; Give python modes the possibility to overwrite this:
512 (if (not comment-start-skip)
513 (set (make-local-variable 'comment-start-skip) "#+\\s-*"))
514 (setq
515 ;; Character used to separation a parent/child relationship
516 semantic-type-relation-separator-character '(".")
517 semantic-command-separation-character ";"
518 ;; Parsing
519 semantic-tag-expand-function 'semantic-python-expand-tag
521 ;; Semantic to take over from the one provided by python.
522 ;; The python one, if it uses the senator advice, will hang
523 ;; Emacs unrecoverably.
524 imenu-create-index-function 'semantic-create-imenu-index
526 ;; I need a python guru to update this list:
527 semantic-symbol->name-assoc-list-for-type-parts '((variable . "Variables")
528 (function . "Methods"))
529 semantic-symbol->name-assoc-list '((type . "Classes")
530 (variable . "Variables")
531 (function . "Functions")
532 (include . "Imports")
533 (package . "Package")
534 (code . "Code")))
537 ;;;###autoload
538 (add-hook 'python-mode-hook 'wisent-python-default-setup)
540 ;; Make sure the newer python modes pull in the same python
541 ;; mode overrides.
542 (define-child-mode python-2-mode python-mode "Python 2 mode")
543 (define-child-mode python-3-mode python-mode "Python 3 mode")
546 ;;; Utility functions
549 (defun semantic-python-special-p (tag)
550 "Return non-nil if the name of TAG is a special identifier of
551 the form __NAME__. "
552 (string-match
553 (rx (seq string-start "__" (1+ (syntax symbol)) "__" string-end))
554 (semantic-tag-name tag)))
556 (defun semantic-python-private-p (tag)
557 "Return non-nil if the name of TAG follows the convention _NAME
558 for private names."
559 (string-match
560 (rx (seq string-start "_" (0+ (syntax symbol)) string-end))
561 (semantic-tag-name tag)))
563 (defun semantic-python-instance-variable-p (tag &optional self)
564 "Return non-nil if TAG is an instance variable of the instance
565 SELF or the instance name \"self\" if SELF is nil."
566 (when (semantic-tag-of-class-p tag 'variable)
567 (let ((name (semantic-tag-name tag)))
568 (when (string-match
569 (rx-to-string
570 `(seq string-start ,(or self "self") "."))
571 name)
572 (not (string-match "\\." (substring name 5)))))))
574 (defun semantic-python-docstring-p (tag)
575 "Return non-nil, when TAG is a Python documentation string."
576 ;; TAG is considered to be a documentation string if the first
577 ;; member is of class 'code and its name looks like a documentation
578 ;; string.
579 (let ((class (semantic-tag-class tag))
580 (name (semantic-tag-name tag)))
581 (and (eq class 'code)
582 (string-match
583 (rx (seq string-start "\"\"\"" (0+ anything) "\"\"\"" string-end))
584 name))))
586 (defun semantic-python-extract-docstring (tag)
587 "Return the Python documentation string contained in TAG."
588 ;; Strip leading and trailing """
589 (let ((name (semantic-tag-name tag)))
590 (substring name 3 -3)))
593 ;;; Test
596 (defun wisent-python-lex-buffer ()
597 "Run `wisent-python-lexer' on current buffer."
598 (interactive)
599 (semantic-lex-init)
600 (let ((token-stream (semantic-lex (point-min) (point-max) 0)))
601 (with-current-buffer (get-buffer-create "*wisent-python-lexer*")
602 (erase-buffer)
603 (pp token-stream (current-buffer))
604 (goto-char (point-min))
605 (pop-to-buffer (current-buffer)))))
607 (provide 'semantic/wisent/python)
609 ;; Local variables:
610 ;; generated-autoload-file: "../loaddefs.el"
611 ;; generated-autoload-load-name: "semantic/wisent/python"
612 ;; End:
614 ;;; semantic/wisent/python.el ends here