1 # Copyright (C) 2001-2008 by the Free Software Foundation, Inc.
3 # This program is free software; you can redistribute it and/or
4 # modify it under the terms of the GNU General Public License
5 # as published by the Free Software Foundation; either version 2
6 # of the License, or (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
18 """Extract topics from the original mail message."""
27 import email
.Iterators
30 from zope
.interface
import implements
32 from mailman
.i18n
import _
33 from mailman
.interfaces
import IHandler
43 def process(mlist
, msg
, msgdata
):
44 if not mlist
.topics_enabled
:
46 # Extract the Subject:, Keywords:, and possibly body text
48 matchlines
.append(msg
.get('subject', None))
49 matchlines
.append(msg
.get('keywords', None))
50 if mlist
.topics_bodylines_limit
== 0:
51 # Don't scan any body lines
53 elif mlist
.topics_bodylines_limit
< 0:
55 matchlines
.extend(scanbody(msg
))
57 # Scan just some of the body lines
58 matchlines
.extend(scanbody(msg
, mlist
.topics_bodylines_limit
))
59 matchlines
= filter(None, matchlines
)
60 # For each regular expression in the topics list, see if any of the lines
61 # of interest from the message match the regexp. If so, the message gets
62 # added to the specific topics bucket.
64 for name
, pattern
, desc
, emptyflag
in mlist
.topics
:
65 pattern
= OR
.join(pattern
.splitlines())
66 cre
= re
.compile(pattern
, re
.IGNORECASE
)
67 for line
in matchlines
:
72 msgdata
['topichits'] = hits
.keys()
73 msg
['X-Topics'] = NLTAB
.join(hits
.keys())
77 def scanbody(msg
, numlines
=None):
78 # We only scan the body of the message if it is of MIME type text/plain,
79 # or if the outer type is multipart/alternative and there is a text/plain
80 # part. Anything else, and the body is ignored for header-scan purposes.
82 if msg
.get_content_type() == 'text/plain':
84 elif msg
.is_multipart()\
85 and msg
.get_content_type() == 'multipart/alternative':
86 for found
in msg
.get_payload():
87 if found
.get_content_type() == 'text/plain':
93 # Now that we have a Message object that meets our criteria, let's extract
94 # the first numlines of body text.
97 reader
= list(email
.Iterators
.body_line_iterator(msg
))
98 while numlines
is None or lineno
< numlines
:
103 # Blank lines don't count
108 # Concatenate those body text lines with newlines, and then create a new
109 # message object from those lines.
110 p
= _ForgivingParser()
111 msg
= p
.parsestr(EMPTYSTRING
.join(lines
))
112 return msg
.get_all('subject', []) + msg
.get_all('keywords', [])
116 class _ForgivingParser(email
.Parser
.HeaderParser
):
117 # Be a little more forgiving about non-header/continuation lines, since
118 # we'll just read as much as we can from "header-like" lines in the body.
120 # BAW: WIBNI we didn't have to cut-n-paste this whole thing just to
121 # specialize the way it returns?
122 def _parseheaders(self
, container
, fp
):
123 # Parse the headers, returning a list of header/value pairs. None as
124 # the header means the Unix-From header.
129 # Don't strip the line before we test for the end condition,
130 # because whitespace-only header lines are RFC compliant
131 # continuation lines.
135 line
= line
.splitlines()[0]
138 # Ignore the trailing newline
140 # Check for initial Unix From_ line
141 if line
.startswith('From '):
143 container
.set_unixfrom(line
)
147 # Header continuation line
151 lastvalue
.append(line
)
153 # Normal, non-continuation header. BAW: this should check to make
154 # sure it's a legal header, e.g. doesn't contain spaces. Also, we
155 # should expose the header matching algorithm in the API, and
156 # allow for a non-strict parsing mode (that ignores the line
157 # instead of raising the exception).
162 container
[lastheader
] = NLTAB
.join(lastvalue
)
163 lastheader
= line
[:i
]
164 lastvalue
= [line
[i
+1:].lstrip()]
165 # Make sure we retain the last header
167 container
[lastheader
] = NLTAB
.join(lastvalue
)
172 """Tag messages with topic matches."""
177 description
= _('Tag messages with topic matches.')
179 def process(self
, mlist
, msg
, msgdata
):
180 """See `IHandler`."""
181 process(mlist
, msg
, msgdata
)