Port header matching previously described by the misnamed KNONW_SPAMMERS
[mailman.git] / Mailman / Message.py
bloba9256dc909082f4c94a1c5a7e6485241288b274b
1 # Copyright (C) 1998-2007 by the Free Software Foundation, Inc.
3 # This program is free software; you can redistribute it and/or
4 # modify it under the terms of the GNU General Public License
5 # as published by the Free Software Foundation; either version 2
6 # of the License, or (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
16 # USA.
18 """Standard Mailman message object.
20 This is a subclass of mimeo.Message but provides a slightly extended interface
21 which is more convenient for use inside Mailman.
22 """
24 import re
25 import email
26 import email.message
27 import email.utils
29 from email.charset import Charset
30 from email.header import Header
32 from Mailman import Utils
33 from Mailman.configuration import config
35 COMMASPACE = ', '
37 mo = re.match(r'([\d.]+)', email.__version__)
38 VERSION = tuple([int(s) for s in mo.group().split('.')])
42 class Message(email.message.Message):
43 def __init__(self):
44 # We need a version number so that we can optimize __setstate__()
45 self.__version__ = VERSION
46 email.message.Message.__init__(self)
48 def __getitem__(self, key):
49 value = email.message.Message.__getitem__(self, key)
50 if isinstance(value, str):
51 return unicode(value, 'ascii')
52 return value
54 def get(self, name, failobj=None):
55 value = email.message.Message.get(self, name, failobj)
56 if isinstance(value, str):
57 return unicode(value, 'ascii')
58 return value
60 def get_all(self, name, failobj=None):
61 missing = object()
62 all_values = email.message.Message.get_all(self, name, missing)
63 if all_values is missing:
64 return failobj
65 return [(unicode(value, 'ascii') if isinstance(value, str) else value)
66 for value in all_values]
68 # BAW: For debugging w/ bin/dumpdb. Apparently pprint uses repr.
69 def __repr__(self):
70 return self.__str__()
72 def __setstate__(self, d):
73 # The base class attributes have changed over time. Which could
74 # affect Mailman if messages are sitting in the queue at the time of
75 # upgrading the email package. We shouldn't burden email with this,
76 # so we handle schema updates here.
77 self.__dict__ = d
78 # We know that email 2.4.3 is up-to-date
79 version = d.get('__version__', (0, 0, 0))
80 d['__version__'] = VERSION
81 if version >= VERSION:
82 return
83 # Messages grew a _charset attribute between email version 0.97 and 1.1
84 if not d.has_key('_charset'):
85 self._charset = None
86 # Messages grew a _default_type attribute between v2.1 and v2.2
87 if not d.has_key('_default_type'):
88 # We really have no idea whether this message object is contained
89 # inside a multipart/digest or not, so I think this is the best we
90 # can do.
91 self._default_type = 'text/plain'
92 # Header instances used to allow both strings and Charsets in their
93 # _chunks, but by email 2.4.3 now it's just Charsets.
94 headers = []
95 hchanged = 0
96 for k, v in self._headers:
97 if isinstance(v, Header):
98 chunks = []
99 cchanged = 0
100 for s, charset in v._chunks:
101 if isinstance(charset, str):
102 charset = Charset(charset)
103 cchanged = 1
104 chunks.append((s, charset))
105 if cchanged:
106 v._chunks = chunks
107 hchanged = 1
108 headers.append((k, v))
109 if hchanged:
110 self._headers = headers
112 # I think this method ought to eventually be deprecated
113 def get_sender(self, use_envelope=None, preserve_case=0):
114 """Return the address considered to be the author of the email.
116 This can return either the From: header, the Sender: header or the
117 envelope header (a.k.a. the unixfrom header). The first non-empty
118 header value found is returned. However the search order is
119 determined by the following:
121 - If config.USE_ENVELOPE_SENDER is true, then the search order is
122 Sender:, From:, unixfrom
124 - Otherwise, the search order is From:, Sender:, unixfrom
126 The optional argument use_envelope, if given overrides the
127 config.USE_ENVELOPE_SENDER setting. It should be set to either 0 or 1
128 (don't use None since that indicates no-override).
130 unixfrom should never be empty. The return address is always
131 lowercased, unless preserve_case is true.
133 This method differs from get_senders() in that it returns one and only
134 one address, and uses a different search order.
136 senderfirst = config.USE_ENVELOPE_SENDER
137 if use_envelope is not None:
138 senderfirst = use_envelope
139 if senderfirst:
140 headers = ('sender', 'from')
141 else:
142 headers = ('from', 'sender')
143 for h in headers:
144 # Use only the first occurrance of Sender: or From:, although it's
145 # not likely there will be more than one.
146 fieldval = self[h]
147 if not fieldval:
148 continue
149 addrs = email.utils.getaddresses([fieldval])
150 try:
151 realname, address = addrs[0]
152 except IndexError:
153 continue
154 if address:
155 break
156 else:
157 # We didn't find a non-empty header, so let's fall back to the
158 # unixfrom address. This should never be empty, but if it ever
159 # is, it's probably a Really Bad Thing. Further, we just assume
160 # that if the unixfrom exists, the second field is the address.
161 unixfrom = self.get_unixfrom()
162 if unixfrom:
163 address = unixfrom.split()[1]
164 else:
165 # TBD: now what?!
166 address = ''
167 if not preserve_case:
168 return address.lower()
169 return address
171 def get_senders(self, preserve_case=0, headers=None):
172 """Return a list of addresses representing the author of the email.
174 The list will contain the following addresses (in order)
175 depending on availability:
177 1. From:
178 2. unixfrom
179 3. Reply-To:
180 4. Sender:
182 The return addresses are always lower cased, unless `preserve_case' is
183 true. Optional `headers' gives an alternative search order, with None
184 meaning, search the unixfrom header. Items in `headers' are field
185 names without the trailing colon.
187 if headers is None:
188 headers = config.SENDER_HEADERS
189 pairs = []
190 for h in headers:
191 if h is None:
192 # get_unixfrom() returns None if there's no envelope
193 fieldval = self.get_unixfrom() or ''
194 try:
195 pairs.append(('', fieldval.split()[1]))
196 except IndexError:
197 # Ignore badly formatted unixfroms
198 pass
199 else:
200 fieldvals = self.get_all(h)
201 if fieldvals:
202 pairs.extend(email.utils.getaddresses(fieldvals))
203 authors = []
204 for pair in pairs:
205 address = pair[1]
206 if address is not None and not preserve_case:
207 address = address.lower()
208 authors.append(address)
209 return authors
211 def get_filename(self, failobj=None):
212 """Some MUA have bugs in RFC2231 filename encoding and cause
213 Mailman to stop delivery in Scrubber.py (called from ToDigest.py).
215 try:
216 filename = email.message.Message.get_filename(self, failobj)
217 return filename
218 except (UnicodeError, LookupError, ValueError):
219 return failobj
223 class UserNotification(Message):
224 """Class for internally crafted messages."""
226 def __init__(self, recip, sender, subject=None, text=None, lang=None):
227 Message.__init__(self)
228 charset = 'us-ascii'
229 if lang is not None:
230 charset = Utils.GetCharSet(lang)
231 if text is not None:
232 self.set_payload(text.encode(charset), charset)
233 if subject is None:
234 subject = '(no subject)'
235 self['Subject'] = Header(subject.encode(charset), charset,
236 header_name='Subject', errors='replace')
237 self['From'] = sender
238 if isinstance(recip, list):
239 self['To'] = COMMASPACE.join(recip)
240 self.recips = recip
241 else:
242 self['To'] = recip
243 self.recips = [recip]
245 def send(self, mlist, **_kws):
246 """Sends the message by enqueuing it to the 'virgin' queue.
248 This is used for all internally crafted messages.
250 # Since we're crafting the message from whole cloth, let's make sure
251 # this message has a Message-ID. Yes, the MTA would give us one, but
252 # this is useful for logging to logs/smtp.
253 if 'message-id' not in self:
254 self['Message-ID'] = email.utils.make_msgid()
255 # Ditto for Date: which is required by RFC 2822
256 if 'date' not in self:
257 self['Date'] = email.utils.formatdate(localtime=True)
258 # UserNotifications are typically for admin messages, and for messages
259 # other than list explosions. Send these out as Precedence: bulk, but
260 # don't override an existing Precedence: header.
261 if 'precedence' not in self:
262 self['Precedence'] = 'bulk'
263 self._enqueue(mlist, **_kws)
265 def _enqueue(self, mlist, **_kws):
266 # Not imported at module scope to avoid import loop
267 from Mailman.queue import Switchboard
268 virginq = Switchboard(config.VIRGINQUEUE_DIR)
269 # The message metadata better have a 'recip' attribute.
270 enqueue_kws = dict(
271 recips=self.recips,
272 nodecorate=True,
273 reduced_list_headers=True,
275 if mlist is not None:
276 enqueue_kws['listname'] = mlist.fqdn_listname
277 enqueue_kws.update(_kws)
278 virginq.enqueue(self, **enqueue_kws)
282 class OwnerNotification(UserNotification):
283 """Like user notifications, but this message goes to the list owners."""
285 def __init__(self, mlist, subject=None, text=None, tomoderators=True):
286 if tomoderators:
287 roster = mlist.moderators
288 else:
289 roster = mlist.owners
290 recips = [address.address for address in roster.addresses]
291 sender = config.SITE_OWNER_ADDRESS
292 lang = mlist.preferred_language
293 UserNotification.__init__(self, recips, sender, subject, text, lang)
294 # Hack the To header to look like it's going to the -owner address
295 del self['to']
296 self['To'] = mlist.owner_address
297 self._sender = sender
299 def _enqueue(self, mlist, **_kws):
300 # Not imported at module scope to avoid import loop
301 from Mailman.queue import Switchboard
302 virginq = Switchboard(config.VIRGINQUEUE_DIR)
303 # The message metadata better have a `recip' attribute
304 virginq.enqueue(self,
305 listname=mlist.fqdn_listname,
306 recips=self.recips,
307 nodecorate=True,
308 reduced_list_headers=True,
309 envsender=self._sender,
310 **_kws)