Use external tool `w3m' for dumping html to plain texts.
[xwl-elisp.git] / url-html.el
blobbabbf935e35a440db42e97015c64920b6c149cef
1 ;;; url-html.el --- Tools for dealing with html pages
3 ;; Copyright (C) 2008 William Xu
5 ;; Author: William Xu <william.xwl@gmail.com>
6 ;; Version: 0.1
8 ;; This program is free software; you can redistribute it and/or modify
9 ;; it under the terms of the GNU General Public License as published by
10 ;; the Free Software Foundation; either version 3, or (at your option)
11 ;; any later version.
13 ;; This program is distributed in the hope that it will be useful, but
14 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 ;; General Public License for more details.
18 ;; You should have received a copy of the GNU General Public License
19 ;; along with EMMS; see the file COPYING. If not, write to the
20 ;; Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
21 ;; Boston, MA 02110-1301, USA.
23 ;;; Code:
25 (defun url-html-decode-buffer (&optional buffer)
26 "Decode html BUFFER(default is current buffer).
27 Usually used in buffer retrieved by `url-retrieve'. If no charset info
28 is specified in html tag, default is 'utf-8."
29 (unless buffer
30 (setq buffer (current-buffer)))
31 (with-current-buffer buffer
32 (let ((coding 'utf-8))
33 ;; (switch-to-buffer buffer)
34 (when (save-excursion
35 (goto-char (point-min))
36 (re-search-forward "<meta http-equiv.*charset=[[:blank:]]*\\([a-zA-Z0-9_-]+\\)" nil t 1))
37 (setq coding (intern (downcase (match-string 1)))))
38 (set-buffer-multibyte t)
39 (decode-coding-region (point-min) (point-max) coding))))
42 (provide 'url-html)
44 ;;; url-html.el ends here