Fix bug #11073 with bidi-related crashes displaying some byte sequences.
[emacs.git] / lisp / thingatpt.el
blob1600785c1173d8f3c691f117fc8268d66e6c68e6
1 ;;; thingatpt.el --- get the `thing' at point
3 ;; Copyright (C) 1991-1998, 2000-2012 Free Software Foundation, Inc.
5 ;; Author: Mike Williams <mikew@gopher.dosli.govt.nz>
6 ;; Maintainer: FSF
7 ;; Keywords: extensions, matching, mouse
8 ;; Created: Thu Mar 28 13:48:23 1991
10 ;; This file is part of GNU Emacs.
12 ;; GNU Emacs is free software: you can redistribute it and/or modify
13 ;; it under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation, either version 3 of the License, or
15 ;; (at your option) any later version.
17 ;; GNU Emacs is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;; GNU General Public License for more details.
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
25 ;;; Commentary:
27 ;; This file provides routines for getting the "thing" at the location of
28 ;; point, whatever that "thing" happens to be. The "thing" is defined by
29 ;; its beginning and end positions in the buffer.
31 ;; The function bounds-of-thing-at-point finds the beginning and end
32 ;; positions by moving first forward to the end of the "thing", and then
33 ;; backwards to the beginning. By default, it uses the corresponding
34 ;; forward-"thing" operator (eg. forward-word, forward-line).
36 ;; Special cases are allowed for using properties associated with the named
37 ;; "thing":
39 ;; forward-op Function to call to skip forward over a "thing" (or
40 ;; with a negative argument, backward).
42 ;; beginning-op Function to call to skip to the beginning of a "thing".
43 ;; end-op Function to call to skip to the end of a "thing".
45 ;; Reliance on existing operators means that many `things' can be accessed
46 ;; without further code: eg.
47 ;; (thing-at-point 'line)
48 ;; (thing-at-point 'page)
50 ;;; Code:
52 (provide 'thingatpt)
54 ;; Basic movement
56 ;;;###autoload
57 (defun forward-thing (thing &optional n)
58 "Move forward to the end of the Nth next THING.
59 THING should be a symbol specifying a type of syntactic entity.
60 Possibilities include `symbol', `list', `sexp', `defun',
61 `filename', `url', `email', `word', `sentence', `whitespace',
62 `line', and `page'."
63 (let ((forward-op (or (get thing 'forward-op)
64 (intern-soft (format "forward-%s" thing)))))
65 (if (functionp forward-op)
66 (funcall forward-op (or n 1))
67 (error "Can't determine how to move over a %s" thing))))
69 ;; General routines
71 ;;;###autoload
72 (defun bounds-of-thing-at-point (thing)
73 "Determine the start and end buffer locations for the THING at point.
74 THING should be a symbol specifying a type of syntactic entity.
75 Possibilities include `symbol', `list', `sexp', `defun',
76 `filename', `url', `email', `word', `sentence', `whitespace',
77 `line', and `page'.
79 See the file `thingatpt.el' for documentation on how to define a
80 valid THING.
82 Return a cons cell (START . END) giving the start and end
83 positions of the thing found."
84 (if (get thing 'bounds-of-thing-at-point)
85 (funcall (get thing 'bounds-of-thing-at-point))
86 (let ((orig (point)))
87 (condition-case nil
88 (save-excursion
89 ;; Try moving forward, then back.
90 (funcall ;; First move to end.
91 (or (get thing 'end-op)
92 (lambda () (forward-thing thing 1))))
93 (funcall ;; Then move to beg.
94 (or (get thing 'beginning-op)
95 (lambda () (forward-thing thing -1))))
96 (let ((beg (point)))
97 (if (<= beg orig)
98 ;; If that brings us all the way back to ORIG,
99 ;; it worked. But END may not be the real end.
100 ;; So find the real end that corresponds to BEG.
101 ;; FIXME: in which cases can `real-end' differ from `end'?
102 (let ((real-end
103 (progn
104 (funcall
105 (or (get thing 'end-op)
106 (lambda () (forward-thing thing 1))))
107 (point))))
108 (when (and (<= orig real-end) (< beg real-end))
109 (cons beg real-end)))
110 (goto-char orig)
111 ;; Try a second time, moving backward first and then forward,
112 ;; so that we can find a thing that ends at ORIG.
113 (funcall ;; First, move to beg.
114 (or (get thing 'beginning-op)
115 (lambda () (forward-thing thing -1))))
116 (funcall ;; Then move to end.
117 (or (get thing 'end-op)
118 (lambda () (forward-thing thing 1))))
119 (let ((end (point))
120 (real-beg
121 (progn
122 (funcall
123 (or (get thing 'beginning-op)
124 (lambda () (forward-thing thing -1))))
125 (point))))
126 (if (and (<= real-beg orig) (<= orig end) (< real-beg end))
127 (cons real-beg end))))))
128 (error nil)))))
130 ;;;###autoload
131 (defun thing-at-point (thing)
132 "Return the THING at point.
133 THING should be a symbol specifying a type of syntactic entity.
134 Possibilities include `symbol', `list', `sexp', `defun',
135 `filename', `url', `email', `word', `sentence', `whitespace',
136 `line', and `page'.
138 See the file `thingatpt.el' for documentation on how to define
139 a symbol as a valid THING."
140 (if (get thing 'thing-at-point)
141 (funcall (get thing 'thing-at-point))
142 (let ((bounds (bounds-of-thing-at-point thing)))
143 (if bounds
144 (buffer-substring (car bounds) (cdr bounds))))))
146 ;; Go to beginning/end
148 (defun beginning-of-thing (thing)
149 "Move point to the beginning of THING.
150 The bounds of THING are determined by `bounds-of-thing-at-point'."
151 (let ((bounds (bounds-of-thing-at-point thing)))
152 (or bounds (error "No %s here" thing))
153 (goto-char (car bounds))))
155 (defun end-of-thing (thing)
156 "Move point to the end of THING.
157 The bounds of THING are determined by `bounds-of-thing-at-point'."
158 (let ((bounds (bounds-of-thing-at-point thing)))
159 (or bounds (error "No %s here" thing))
160 (goto-char (cdr bounds))))
162 ;; Special cases
164 ;; Lines
166 ;; bolp will be false when you click on the last line in the buffer
167 ;; and it has no final newline.
169 (put 'line 'beginning-op
170 (lambda () (if (bolp) (forward-line -1) (beginning-of-line))))
172 ;; Sexps
174 (defun in-string-p ()
175 "Return non-nil if point is in a string.
176 \[This is an internal function.]"
177 (let ((orig (point)))
178 (save-excursion
179 (beginning-of-defun)
180 (nth 3 (parse-partial-sexp (point) orig)))))
182 (defun end-of-sexp ()
183 "Move point to the end of the current sexp.
184 \[This is an internal function.]"
185 (let ((char-syntax (char-syntax (char-after))))
186 (if (or (eq char-syntax ?\))
187 (and (eq char-syntax ?\") (in-string-p)))
188 (forward-char 1)
189 (forward-sexp 1))))
191 (put 'sexp 'end-op 'end-of-sexp)
193 (defun beginning-of-sexp ()
194 "Move point to the beginning of the current sexp.
195 \[This is an internal function.]"
196 (let ((char-syntax (char-syntax (char-before))))
197 (if (or (eq char-syntax ?\()
198 (and (eq char-syntax ?\") (in-string-p)))
199 (forward-char -1)
200 (forward-sexp -1))))
202 (put 'sexp 'beginning-op 'beginning-of-sexp)
204 ;; Lists
206 (put 'list 'bounds-of-thing-at-point 'thing-at-point-bounds-of-list-at-point)
208 (defun thing-at-point-bounds-of-list-at-point ()
209 "Return the bounds of the list at point.
210 \[Internal function used by `bounds-of-thing-at-point'.]"
211 (save-excursion
212 (let ((opoint (point))
213 (beg (condition-case nil
214 (progn (up-list -1)
215 (point))
216 (error nil))))
217 (condition-case nil
218 (if beg
219 (progn (forward-sexp)
220 (cons beg (point)))
221 ;; Are we are at the beginning of a top-level sexp?
222 (forward-sexp)
223 (let ((end (point)))
224 (backward-sexp)
225 (if (>= opoint (point))
226 (cons opoint end))))
227 (error nil)))))
229 ;; Defuns
231 (put 'defun 'beginning-op 'beginning-of-defun)
232 (put 'defun 'end-op 'end-of-defun)
233 (put 'defun 'forward-op 'end-of-defun)
235 ;; Filenames and URLs www.com/foo%32bar
237 (defvar thing-at-point-file-name-chars "-~/[:alnum:]_.${}#%,:"
238 "Characters allowable in filenames.")
240 (put 'filename 'end-op
241 (lambda ()
242 (re-search-forward (concat "\\=[" thing-at-point-file-name-chars "]*")
243 nil t)))
244 (put 'filename 'beginning-op
245 (lambda ()
246 (if (re-search-backward (concat "[^" thing-at-point-file-name-chars "]")
247 nil t)
248 (forward-char)
249 (goto-char (point-min)))))
251 (defvar thing-at-point-url-path-regexp
252 "[^]\t\n \"'<>[^`{}]*[^]\t\n \"'<>[^`{}.,;]+"
253 "A regular expression probably matching the host and filename or e-mail part of a URL.")
255 (defvar thing-at-point-short-url-regexp
256 (concat "[-A-Za-z0-9]+\\.[-A-Za-z0-9.]+" thing-at-point-url-path-regexp)
257 "A regular expression probably matching a URL without an access scheme.
258 Hostname matching is stricter in this case than for
259 ``thing-at-point-url-regexp''.")
261 (defvar thing-at-point-uri-schemes
262 ;; Officials from http://www.iana.org/assignments/uri-schemes.html
263 '("ftp://" "http://" "gopher://" "mailto:" "news:" "nntp:"
264 "telnet://" "wais://" "file:/" "prospero:" "z39.50s:" "z39.50r:"
265 "cid:" "mid:" "vemmi:" "service:" "imap:" "nfs:" "acap:" "rtsp:"
266 "tip:" "pop:" "data:" "dav:" "opaquelocktoken:" "sip:" "tel:" "fax:"
267 "modem:" "ldap:" "https://" "soap.beep:" "soap.beeps:" "urn:" "go:"
268 "afs:" "tn3270:" "mailserver:"
269 "crid:" "dict:" "dns:" "dtn:" "h323:" "im:" "info:" "ipp:"
270 "iris.beep:" "mtqp:" "mupdate:" "pres:" "sips:" "snmp:" "tag:"
271 "tftp:" "xmlrpc.beep:" "xmlrpc.beeps:" "xmpp:"
272 ;; Compatibility
273 "snews:" "irc:" "mms://" "mmsh://")
274 "Uniform Resource Identifier (URI) Schemes.")
276 (defvar thing-at-point-url-regexp
277 (concat "\\<\\(" (mapconcat 'identity thing-at-point-uri-schemes "\\|") "\\)"
278 thing-at-point-url-path-regexp)
279 "A regular expression probably matching a complete URL.")
281 (defvar thing-at-point-markedup-url-regexp
282 "<URL:[^>]+>"
283 "A regular expression matching a URL marked up per RFC1738.
284 This may contain whitespace (including newlines) .")
286 (put 'url 'bounds-of-thing-at-point 'thing-at-point-bounds-of-url-at-point)
287 (defun thing-at-point-bounds-of-url-at-point ()
288 (let ((strip (thing-at-point-looking-at
289 thing-at-point-markedup-url-regexp))) ;; (url "") short
290 (if (or strip
291 (thing-at-point-looking-at thing-at-point-url-regexp)
292 ;; Access scheme omitted?
293 ;; (setq short (thing-at-point-looking-at
294 ;; thing-at-point-short-url-regexp))
296 (let ((beginning (match-beginning 0))
297 (end (match-end 0)))
298 (when strip
299 (setq beginning (+ beginning 5))
300 (setq end (- end 1)))
301 (cons beginning end)))))
303 (put 'url 'thing-at-point 'thing-at-point-url-at-point)
304 (defun thing-at-point-url-at-point ()
305 "Return the URL around or before point.
307 Search backwards for the start of a URL ending at or after point. If
308 no URL found, return nil. The access scheme will be prepended if
309 absent: \"mailto:\" if the string contains \"@\", \"ftp://\" if it
310 starts with \"ftp\" and not \"ftp:/\", or \"http://\" by default."
312 (let ((url "") short strip)
313 (if (or (setq strip (thing-at-point-looking-at
314 thing-at-point-markedup-url-regexp))
315 (thing-at-point-looking-at thing-at-point-url-regexp)
316 ;; Access scheme omitted?
317 (setq short (thing-at-point-looking-at
318 thing-at-point-short-url-regexp)))
319 (progn
320 (setq url (buffer-substring-no-properties (match-beginning 0)
321 (match-end 0)))
322 (and strip (setq url (substring url 5 -1))) ; Drop "<URL:" & ">"
323 ;; strip whitespace
324 (while (string-match "[ \t\n\r]+" url)
325 (setq url (replace-match "" t t url)))
326 (and short (setq url (concat (cond ((string-match "^[a-zA-Z]+:" url)
327 ;; already has a URL scheme.
329 ((string-match "@" url)
330 "mailto:")
331 ;; e.g. ftp.swiss... or ftp-swiss...
332 ((string-match "^ftp" url)
333 "ftp://")
334 (t "http://"))
335 url)))
336 (if (string-equal "" url)
338 url)))))
340 ;; The normal thingatpt mechanism doesn't work for complex regexps.
341 ;; This should work for almost any regexp wherever we are in the
342 ;; match. To do a perfect job for any arbitrary regexp would mean
343 ;; testing every position before point. Regexp searches won't find
344 ;; matches that straddle the start position so we search forwards once
345 ;; and then back repeatedly and then back up a char at a time.
347 (defun thing-at-point-looking-at (regexp)
348 "Return non-nil if point is in or just after a match for REGEXP.
349 Set the match data from the earliest such match ending at or after
350 point."
351 (save-excursion
352 (let ((old-point (point)) match)
353 (and (looking-at regexp)
354 (>= (match-end 0) old-point)
355 (setq match (point)))
356 ;; Search back repeatedly from end of next match.
357 ;; This may fail if next match ends before this match does.
358 (re-search-forward regexp nil 'limit)
359 (while (and (re-search-backward regexp nil t)
360 (or (> (match-beginning 0) old-point)
361 (and (looking-at regexp) ; Extend match-end past search start
362 (>= (match-end 0) old-point)
363 (setq match (point))))))
364 (if (not match) nil
365 (goto-char match)
366 ;; Back up a char at a time in case search skipped
367 ;; intermediate match straddling search start pos.
368 (while (and (not (bobp))
369 (progn (backward-char 1) (looking-at regexp))
370 (>= (match-end 0) old-point)
371 (setq match (point))))
372 (goto-char match)
373 (looking-at regexp)))))
375 (put 'url 'end-op
376 (lambda ()
377 (let ((bounds (thing-at-point-bounds-of-url-at-point)))
378 (if bounds
379 (goto-char (cdr bounds))
380 (error "No URL here")))))
381 (put 'url 'beginning-op
382 (lambda ()
383 (let ((bounds (thing-at-point-bounds-of-url-at-point)))
384 (if bounds
385 (goto-char (car bounds))
386 (error "No URL here")))))
388 ;; Email addresses
389 (defvar thing-at-point-email-regexp
390 "<?[-+_.~a-zA-Z][-+_.~:a-zA-Z0-9]*@[-.a-zA-Z0-9]+>?"
391 "A regular expression probably matching an email address.
392 This does not match the real name portion, only the address, optionally
393 with angle brackets.")
395 ;; Haven't set 'forward-op on 'email nor defined 'forward-email' because
396 ;; not sure they're actually needed, and URL seems to skip them too.
397 ;; Note that (end-of-thing 'email) and (beginning-of-thing 'email)
398 ;; work automagically, though.
400 (put 'email 'bounds-of-thing-at-point
401 (lambda ()
402 (let ((thing (thing-at-point-looking-at thing-at-point-email-regexp)))
403 (if thing
404 (let ((beginning (match-beginning 0))
405 (end (match-end 0)))
406 (cons beginning end))))))
408 (put 'email 'thing-at-point
409 (lambda ()
410 (let ((boundary-pair (bounds-of-thing-at-point 'email)))
411 (if boundary-pair
412 (buffer-substring-no-properties
413 (car boundary-pair) (cdr boundary-pair))))))
415 ;; Whitespace
417 (defun forward-whitespace (arg)
418 "Move point to the end of the next sequence of whitespace chars.
419 Each such sequence may be a single newline, or a sequence of
420 consecutive space and/or tab characters.
421 With prefix argument ARG, do it ARG times if positive, or move
422 backwards ARG times if negative."
423 (interactive "p")
424 (if (natnump arg)
425 (re-search-forward "[ \t]+\\|\n" nil 'move arg)
426 (while (< arg 0)
427 (if (re-search-backward "[ \t]+\\|\n" nil 'move)
428 (or (eq (char-after (match-beginning 0)) ?\n)
429 (skip-chars-backward " \t")))
430 (setq arg (1+ arg)))))
432 ;; Buffer
434 (put 'buffer 'end-op (lambda () (goto-char (point-max))))
435 (put 'buffer 'beginning-op (lambda () (goto-char (point-min))))
437 ;; Symbols
439 (defun forward-symbol (arg)
440 "Move point to the next position that is the end of a symbol.
441 A symbol is any sequence of characters that are in either the
442 word constituent or symbol constituent syntax class.
443 With prefix argument ARG, do it ARG times if positive, or move
444 backwards ARG times if negative."
445 (interactive "p")
446 (if (natnump arg)
447 (re-search-forward "\\(\\sw\\|\\s_\\)+" nil 'move arg)
448 (while (< arg 0)
449 (if (re-search-backward "\\(\\sw\\|\\s_\\)+" nil 'move)
450 (skip-syntax-backward "w_"))
451 (setq arg (1+ arg)))))
453 ;; Syntax blocks
455 (defun forward-same-syntax (&optional arg)
456 "Move point past all characters with the same syntax class.
457 With prefix argument ARG, do it ARG times if positive, or move
458 backwards ARG times if negative."
459 (interactive "p")
460 (while (< arg 0)
461 (skip-syntax-backward
462 (char-to-string (char-syntax (char-before))))
463 (setq arg (1+ arg)))
464 (while (> arg 0)
465 (skip-syntax-forward (char-to-string (char-syntax (char-after))))
466 (setq arg (1- arg))))
468 ;; Aliases
470 (defun word-at-point ()
471 "Return the word at point. See `thing-at-point'."
472 (thing-at-point 'word))
474 (defun sentence-at-point ()
475 "Return the sentence at point. See `thing-at-point'."
476 (thing-at-point 'sentence))
478 (defun read-from-whole-string (str)
479 "Read a Lisp expression from STR.
480 Signal an error if the entire string was not used."
481 (let* ((read-data (read-from-string str))
482 (more-left
483 (condition-case nil
484 ;; The call to `ignore' suppresses a compiler warning.
485 (progn (ignore (read-from-string (substring str (cdr read-data))))
487 (end-of-file nil))))
488 (if more-left
489 (error "Can't read whole string")
490 (car read-data))))
492 (defun form-at-point (&optional thing pred)
493 (let ((sexp (condition-case nil
494 (read-from-whole-string (thing-at-point (or thing 'sexp)))
495 (error nil))))
496 (if (or (not pred) (funcall pred sexp)) sexp)))
498 ;;;###autoload
499 (defun sexp-at-point ()
500 "Return the sexp at point, or nil if none is found."
501 (form-at-point 'sexp))
502 ;;;###autoload
503 (defun symbol-at-point ()
504 "Return the symbol at point, or nil if none is found."
505 (let ((thing (thing-at-point 'symbol)))
506 (if thing (intern thing))))
507 ;;;###autoload
508 (defun number-at-point ()
509 "Return the number at point, or nil if none is found."
510 (form-at-point 'sexp 'numberp))
511 ;;;###autoload
512 (defun list-at-point ()
513 "Return the Lisp list at point, or nil if none is found."
514 (form-at-point 'list 'listp))
516 ;;; thingatpt.el ends here