1 html2text.php is a modified copy of a file shipped with the RoundCube project:
3 http://trac.roundcube.net/log/trunk/roundcubemail/program/lib/html2text.php
9 1- fix for these warnings in cron:
11 "html_entity_decode bug - cannot yet handle MBCS in html_entity_decode()!"
15 $tl=textlib_get_instance();
16 $text = $tl->entities_to_utf8($text, true);
20 $text = html_entity_decode($text, ENT_COMPAT, 'UTF-8');
23 2- fixed error in preg_replace_callback on php4
25 --- a/lib/html2text.php
26 +++ b/lib/html2text.php
27 @@ -468,7 +468,7 @@ class html2text
29 // Run our defined search-and-replace
30 $text = preg_replace($this->search, $this->replace, $text);
31 - $text = preg_replace_callback($this->callback_search, array('html2text', '_preg_callback'), $text);
32 + $text = preg_replace_callback($this->callback_search, array(&$this, '_preg_callback'), $text);
34 // Replace known html entities
35 $text = utf8_encode(html_entity_decode($text));
38 -- Francois Marier <francois@catalyst.net.nz> 2009-05-22
41 2- Don't just strip images, replace them with their alt text.
43 index b7e3e3e..96ef508 100644
44 --- a/lib/html2text.php
45 +++ b/lib/html2text.php
46 @@ -237,6 +237,7 @@ class html2text
47 '/<(a) [^>]*href=("|\')([^"\']+)\2[^>]*>(.*?)<\/a>/i',
49 '/<(th)[^>]*>(.*?)<\/th>/i', // <th> and </th>
50 + '/<(img)[^>]*alt=\"([^>"]+)\"[^>]*>/i', // <img> with alt
54 @@ -574,6 +575,8 @@ class html2text
55 return $this->_strtoupper("\n\n". $matches[2] ."\n\n");
57 return $this->_build_link_list($matches[3], $matches[4]);
59 + return '[' . $matches[2] . ']';
63 -- Tim Hunt 2010-08-04
66 3- Use textlib, not crappy functions that break UTF-8, in the _strtoupper method.
68 Index: lib/html2text.php
69 --- lib/html2text.php 2 Sep 2010 12:49:29 -0000 1.16
70 +++ lib/html2text.php 2 Nov 2010 19:57:09 -0000
73 function _strtoupper($str)
75 - if (function_exists('mb_strtoupper'))
76 - return mb_strtoupper($str);
78 - return strtoupper($str);
79 + $tl = textlib_get_instance();
80 + return $tl->strtoupper($str);
84 -- Tim Hunt 2010-11-02