From f63d76872c77b410f3c28afce5a2919dee2f9242 Mon Sep 17 00:00:00 2001 From: Nicolas Goaziou Date: Tue, 4 Aug 2015 16:40:25 +0200 Subject: [PATCH] Fix links with newline characters * lisp/org-element.el (org-element-link-parser): Correctly parse links with newline characters. * lisp/org.el (org-make-link-regexps): Allow newline characters within angle links. (org-activate-angle-links): Support multiline angle links. * testing/lisp/test-org-element.el (test-org-element/link-parser): Update tests. As a rule of thumb, any newline character and all surrounding whitespaces are treated as a single space in a bracket link. They are removed in angle links. --- lisp/org-element.el | 70 ++++++++++++++++++++++++++-------------- lisp/org.el | 8 ++--- testing/lisp/test-org-element.el | 15 ++++++--- 3 files changed, 58 insertions(+), 35 deletions(-) diff --git a/lisp/org-element.el b/lisp/org-element.el index aa6e8d0d3..c7e76e860 100644 --- a/lisp/org-element.el +++ b/lisp/org-element.el @@ -3045,53 +3045,73 @@ Assume point is at the beginning of the link." contents-end (match-end 1))) ;; Type 2: Standard link, i.e. [[http://orgmode.org][homepage]] ((looking-at org-bracket-link-regexp) - (setq contents-begin (match-beginning 3) - contents-end (match-end 3) - link-end (match-end 0) - ;; RAW-LINK is the original link. Expand any - ;; abbreviation in it. - raw-link (org-translate-link + (setq contents-begin (match-beginning 3)) + (setq contents-end (match-end 3)) + (setq link-end (match-end 0)) + ;; RAW-LINK is the original link. Expand any + ;; abbreviation in it. + ;; + ;; Also treat any newline character and associated + ;; indentation as a single space character. This is not + ;; compatible with RFC 3986, which requires to ignore + ;; them altogether. However, doing so would require + ;; users to encode spaces on the fly when writing links + ;; (e.g., insert [[shell:ls%20*.org]] instead of + ;; [[shell:ls *.org]], which defeats Org's focus on + ;; simplicity. + (setq raw-link (org-translate-link (org-link-expand-abbrev - (org-match-string-no-properties 1)))) - ;; Determine TYPE of link and set PATH accordingly. + (replace-regexp-in-string + "[ \t]*\n[ \t]*" " " + (org-match-string-no-properties 1))))) + ;; Determine TYPE of link and set PATH accordingly. According + ;; to RFC 3986, remove whitespaces from URI in external links. + ;; In internal ones, treat indentation as a single space. (cond ;; File type. ((or (file-name-absolute-p raw-link) (string-match "\\`\\.\\.?/" raw-link)) - (setq type "file" path raw-link)) + (setq type "file") + (setq path raw-link)) ;; Explicit type (http, irc, bbdb...). See `org-link-types'. ((string-match org-link-types-re raw-link) - (setq type (match-string 1 raw-link) - ;; According to RFC 3986, extra whitespace should be - ;; ignored when a URI is extracted. - path (replace-regexp-in-string - "[ \t]*\n[ \t]*" "" (substring raw-link (match-end 0))))) + (setq type (match-string 1 raw-link)) + (setq path (substring raw-link (match-end 0)))) ;; Id type: PATH is the id. - ((string-match "\\`id:\\([-a-f0-9]+\\)" raw-link) + ((string-match "\\`id:\\([-a-f0-9]+\\)\\'" raw-link) (setq type "id" path (match-string 1 raw-link))) ;; Code-ref type: PATH is the name of the reference. - ((string-match "\\`(\\(.*\\))\\'" raw-link) - (setq type "coderef" path (match-string 1 raw-link))) + ((and (org-string-match-p "\\`(" raw-link) + (org-string-match-p ")\\'" raw-link)) + (setq type "coderef") + (setq path (substring raw-link 1 -1))) ;; Custom-id type: PATH is the name of the custom id. ((= (string-to-char raw-link) ?#) - (setq type "custom-id" path (substring raw-link 1))) + (setq type "custom-id") + (setq path (substring raw-link 1))) ;; Fuzzy type: Internal link either matches a target, an ;; headline name or nothing. PATH is the target or ;; headline's name. - (t (setq type "fuzzy" path raw-link)))) + (t + (setq type "fuzzy") + (setq path raw-link)))) ;; Type 3: Plain link, e.g., http://orgmode.org ((looking-at org-plain-link-re) (setq raw-link (org-match-string-no-properties 0) type (org-match-string-no-properties 1) link-end (match-end 0) path (org-match-string-no-properties 2))) - ;; Type 4: Angular link, e.g., + ;; Type 4: Angular link, e.g., . Unlike to + ;; bracket links, follow RFC 3986 and remove any extra + ;; whitespace in URI. ((looking-at org-angle-link-re) - (setq raw-link (buffer-substring-no-properties - (match-beginning 1) (match-end 2)) - type (org-match-string-no-properties 1) - link-end (match-end 0) - path (org-match-string-no-properties 2))) + (setq type (org-match-string-no-properties 1)) + (setq link-end (match-end 0)) + (setq raw-link + (buffer-substring-no-properties + (match-beginning 1) (match-end 2))) + (setq path (replace-regexp-in-string + "[ \t]*\n[ \t]*" "" (org-match-string-no-properties 2)))) (t (throw 'no-object nil))) ;; In any case, deduce end point after trailing white space from ;; LINK-END variable. diff --git a/lisp/org.el b/lisp/org.el index 886608b0c..484f3ffa2 100755 --- a/lisp/org.el +++ b/lisp/org.el @@ -5712,10 +5712,7 @@ This should be called after the variable `org-link-types' has changed." "\\([^" org-non-link-chars " ]" "[^\t\n\r]*\\)") org-angle-link-re - (concat "<" types-re ":" - "\\([^" org-non-link-chars " ]" - "[^" org-non-link-chars "]*" - "\\)>") + (format "<%s:\\(\n?\\(?:[^>\n]+\n?\\)*\\)>" types-re) org-plain-link-re (concat "\\<" types-re ":" @@ -5998,7 +5995,8 @@ by a #." (org-remove-flyspell-overlays-in (match-beginning 0) (match-end 0)) (add-text-properties (match-beginning 0) (match-end 0) (list 'mouse-face 'highlight - 'keymap org-mouse-map)) + 'keymap org-mouse-map + 'font-lock-multiline t)) (org-rear-nonsticky-at (match-end 0)) t))) diff --git a/testing/lisp/test-org-element.el b/testing/lisp/test-org-element.el index 9d9ac86e7..f55c3eeec 100644 --- a/testing/lisp/test-org-element.el +++ b/testing/lisp/test-org-element.el @@ -1628,17 +1628,22 @@ e^{i\\pi}+1=0 (equal (org-element-property :path (org-element-context)) file)))) ;; ... multi-line link. (should - (equal "//orgmode.org" - (org-test-with-temp-text "[[http://orgmode.\norg]]" + (equal "ls *.org" + (org-test-with-temp-text "[[shell:ls\n*.org]]" (org-element-property :path (org-element-context))))) ;; Plain link. (should (org-test-with-temp-text "A link: http://orgmode.org" (org-element-map (org-element-parse-buffer) 'link 'identity))) - ;; Angular link. + ;; Angular link. Follow RFC 3986. (should - (org-test-with-temp-text "A link: " - (org-element-map (org-element-parse-buffer) 'link 'identity nil t))) + (eq 'link + (org-test-with-temp-text "A link: " + (org-element-type (org-element-context))))) + (should + (equal "//orgmode.org" + (org-test-with-temp-text "A link: " + (org-element-property :path (org-element-context))))) ;; Link abbreviation. (should (equal "http" -- 2.11.4.GIT