Infrastructure for testing the Connection class, and for counting the number
[mailman.git] / src / mailman / mta / smtp_direct.py
blob419d4ce9660881716f40d5d061a0b4bd111ad9c2
1 # Copyright (C) 1998-2009 by the Free Software Foundation, Inc.
3 # This file is part of GNU Mailman.
5 # GNU Mailman is free software: you can redistribute it and/or modify it under
6 # the terms of the GNU General Public License as published by the Free
7 # Software Foundation, either version 3 of the License, or (at your option)
8 # any later version.
10 # GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 # more details.
15 # You should have received a copy of the GNU General Public License along with
16 # GNU Mailman. If not, see <http://www.gnu.org/licenses/>.
18 """Local SMTP direct drop-off.
20 This module delivers messages via SMTP to a locally specified daemon. This
21 should be compatible with any modern SMTP server. It is expected that the MTA
22 handles all final delivery. We have to play tricks so that the list object
23 isn't locked while delivery occurs synchronously.
25 Note: This file only handles single threaded delivery. See SMTPThreaded.py
26 for a threaded implementation.
27 """
29 from __future__ import absolute_import, unicode_literals
31 __metaclass__ = type
32 __all__ = [
33 'SMTPDirect',
37 import copy
38 import time
39 import socket
40 import logging
41 import smtplib
43 from email.Charset import Charset
44 from email.Header import Header
45 from email.Utils import formataddr
46 from zope.interface import implements
48 from mailman.config import config
49 from mailman.core import errors
50 from mailman.email.utils import split_email
51 from mailman.i18n import _
52 from mailman.interfaces.handler import IHandler
53 from mailman.interfaces.mailinglist import Personalization
54 from mailman.utilities.string import expand
57 DOT = '.'
58 COMMA = ','
59 log = logging.getLogger('mailman.smtp')
63 def process(mlist, msg, msgdata):
64 recips = msgdata.get('recips')
65 if not recips:
66 # Nobody to deliver to!
67 return
68 # Calculate the non-VERP envelope sender.
69 envsender = msgdata.get('envsender')
70 if envsender is None:
71 if mlist:
72 envsender = mlist.bounces_address
73 else:
74 envsender = config.mailman.site_owner
75 # Time to split up the recipient list. If we're personalizing or VERPing
76 # then each chunk will have exactly one recipient. We'll then hand craft
77 # an envelope sender and stitch a message together in memory for each one
78 # separately. If we're not VERPing, then we'll chunkify based on
79 # SMTP_MAX_RCPTS. Note that most MTAs have a limit on the number of
80 # recipients they'll swallow in a single transaction.
81 deliveryfunc = None
82 if (not msgdata.has_key('personalize') or msgdata['personalize']) and (
83 msgdata.get('verp') or mlist.personalize <> Personalization.none):
84 chunks = [[recip] for recip in recips]
85 msgdata['personalize'] = True
86 deliveryfunc = verpdeliver
87 elif int(config.mta.max_recipients) <= 0:
88 chunks = [recips]
89 else:
90 chunks = chunkify(recips, int(config.mta.max_recipients))
91 # See if this is an unshunted message for which some were undelivered
92 if msgdata.has_key('undelivered'):
93 chunks = msgdata['undelivered']
94 # If we're doing bulk delivery, then we can stitch up the message now.
95 if deliveryfunc is None:
96 # Be sure never to decorate the message more than once!
97 if not msgdata.get('decorated'):
98 handler = config.handlers['decorate']
99 handler.process(mlist, msg, msgdata)
100 msgdata['decorated'] = True
101 deliveryfunc = bulkdeliver
102 refused = {}
103 t0 = time.time()
104 # Open the initial connection
105 origrecips = msgdata['recips']
106 # MAS: get the message sender now for logging. If we're using 'sender'
107 # and not 'from', bulkdeliver changes it for bounce processing. If we're
108 # VERPing, it doesn't matter because bulkdeliver is working on a copy, but
109 # otherwise msg gets changed. If the list is anonymous, the original
110 # sender is long gone, but Cleanse.py has logged it.
111 origsender = msgdata.get('original_sender', msg.sender)
112 # `undelivered' is a copy of chunks that we pop from to do deliveries.
113 # This seems like a good tradeoff between robustness and resource
114 # utilization. If delivery really fails (i.e. qfiles/shunt type
115 # failures), then we'll pick up where we left off with `undelivered'.
116 # This means at worst, the last chunk for which delivery was attempted
117 # could get duplicates but not every one, and no recips should miss the
118 # message.
119 conn = Connection()
120 try:
121 msgdata['undelivered'] = chunks
122 while chunks:
123 chunk = chunks.pop()
124 msgdata['recips'] = chunk
125 try:
126 deliveryfunc(mlist, msg, msgdata, envsender, refused, conn)
127 except Exception:
128 # If /anything/ goes wrong, push the last chunk back on the
129 # undelivered list and re-raise the exception. We don't know
130 # how many of the last chunk might receive the message, so at
131 # worst, everyone in this chunk will get a duplicate. Sigh.
132 chunks.append(chunk)
133 raise
134 del msgdata['undelivered']
135 finally:
136 conn.quit()
137 msgdata['recips'] = origrecips
138 # Log the successful post
139 t1 = time.time()
140 substitutions = dict(
141 msgid = msg.get('message-id', 'n/a'),
142 listname = mlist.fqdn_listname,
143 sender = origsender,
144 recip = len(recips),
145 size = msg.original_size,
146 time = t1 - t0,
147 refused = len(refused),
148 smtpcode = 'n/a',
149 smtpmsg = 'n/a',
151 # Log this message.
152 template = config.logging.smtp.every
153 if template != 'no':
154 log.info('%s', expand(template, substitutions))
155 if refused:
156 template = config.logging.smtp.refused
157 if template != 'no':
158 log.info('%s', expand(template, substitutions))
159 else:
160 # Log the successful post, but if it was not destined to the mailing
161 # list (e.g. to the owner or admin), print the actual recipients
162 # instead of just the number.
163 if not msgdata.get('tolist'):
164 recips = msg.get_all('to', [])
165 recips.extend(msg.get_all('cc', []))
166 substitutions['recips'] = COMMA.join(recips)
167 template = config.logging.smtp.success
168 if template != 'no':
169 log.info('%s', expand(template, substitutions))
170 # Process any failed deliveries.
171 tempfailures = []
172 permfailures = []
173 for recip, (code, smtpmsg) in refused.items():
174 # DRUMS is an internet draft, but it says:
176 # [RFC-821] incorrectly listed the error where an SMTP server
177 # exhausts its implementation limit on the number of RCPT commands
178 # ("too many recipients") as having reply code 552. The correct
179 # reply code for this condition is 452. Clients SHOULD treat a 552
180 # code in this case as a temporary, rather than permanent failure
181 # so the logic below works.
183 if code >= 500 and code <> 552:
184 # A permanent failure
185 permfailures.append(recip)
186 else:
187 # Deal with persistent transient failures by queuing them up for
188 # future delivery. TBD: this could generate lots of log entries!
189 tempfailures.append(recip)
190 template = config.logging.smtp.failure
191 if template != 'no':
192 substitutions.update(
193 recip = recip,
194 smtpcode = code,
195 smtpmsg = smtpmsg,
197 log.info('%s', expand(template, substitutions))
198 # Return the results
199 if tempfailures or permfailures:
200 raise errors.SomeRecipientsFailed(tempfailures, permfailures)
204 def chunkify(recips, chunksize):
205 # First do a simple sort on top level domain. It probably doesn't buy us
206 # much to try to sort on MX record -- that's the MTA's job. We're just
207 # trying to avoid getting a max recips error. Split the chunks along
208 # these lines (as suggested originally by Chuq Von Rospach and slightly
209 # elaborated by BAW).
210 chunkmap = {'com': 1,
211 'net': 2,
212 'org': 2,
213 'edu': 3,
214 'us' : 3,
215 'ca' : 3,
217 buckets = {}
218 for r in recips:
219 tld = None
220 i = r.rfind('.')
221 if i >= 0:
222 tld = r[i+1:]
223 bin = chunkmap.get(tld, 0)
224 bucket = buckets.get(bin, [])
225 bucket.append(r)
226 buckets[bin] = bucket
227 # Now start filling the chunks
228 chunks = []
229 currentchunk = []
230 chunklen = 0
231 for bin in buckets.values():
232 for r in bin:
233 currentchunk.append(r)
234 chunklen = chunklen + 1
235 if chunklen >= chunksize:
236 chunks.append(currentchunk)
237 currentchunk = []
238 chunklen = 0
239 if currentchunk:
240 chunks.append(currentchunk)
241 currentchunk = []
242 chunklen = 0
243 return chunks
247 def verpdeliver(mlist, msg, msgdata, envsender, failures, conn):
248 handler = config.handlers['decorate']
249 for recip in msgdata['recips']:
250 # We now need to stitch together the message with its header and
251 # footer. If we're VERPIng, we have to calculate the envelope sender
252 # for each recipient. Note that the list of recipients must be of
253 # length 1.
255 # BAW: ezmlm includes the message number in the envelope, used when
256 # sending a notification to the user telling her how many messages
257 # they missed due to bouncing. Neat idea.
258 msgdata['recips'] = [recip]
259 # Make a copy of the message and decorate + delivery that
260 msgcopy = copy.deepcopy(msg)
261 handler.process(mlist, msgcopy, msgdata)
262 # Calculate the envelope sender, which we may be VERPing
263 if msgdata.get('verp'):
264 bmailbox, bdomain = split_email(envsender)
265 rmailbox, rdomain = split_email(recip)
266 if rdomain is None:
267 # The recipient address is not fully-qualified. We can't
268 # deliver it to this person, nor can we craft a valid verp
269 # header. I don't think there's much we can do except ignore
270 # this recipient.
271 log.info('Skipping VERP delivery to unqual recip: %s', recip)
272 continue
273 envsender = expand(config.mta.verp_format, dict(
274 bounces=bmailbox, mailbox=rmailbox,
275 host=DOT.join(rdomain))) + '@' + DOT.join(bdomain)
276 if mlist.personalize == Personalization.full:
277 # When fully personalizing, we want the To address to point to the
278 # recipient, not to the mailing list
279 del msgcopy['to']
280 name = None
281 if mlist.isMember(recip):
282 name = mlist.getMemberName(recip)
283 if name:
284 # Convert the name to an email-safe representation. If the
285 # name is a byte string, convert it first to Unicode, given
286 # the character set of the member's language, replacing bad
287 # characters for which we can do nothing about. Once we have
288 # the name as Unicode, we can create a Header instance for it
289 # so that it's properly encoded for email transport.
290 charset = mlist.getMemberLanguage(recip).charset
291 if charset == 'us-ascii':
292 # Since Header already tries both us-ascii and utf-8,
293 # let's add something a bit more useful.
294 charset = 'iso-8859-1'
295 charset = Charset(charset)
296 codec = charset.input_codec or 'ascii'
297 if not isinstance(name, unicode):
298 name = unicode(name, codec, 'replace')
299 name = Header(name, charset).encode()
300 msgcopy['To'] = formataddr((name, recip))
301 else:
302 msgcopy['To'] = recip
303 # We can flag the mail as a duplicate for each member, if they've
304 # already received this message, as calculated by Message-ID. See
305 # AvoidDuplicates.py for details.
306 del msgcopy['x-mailman-copy']
307 if msgdata.get('add-dup-header', {}).has_key(recip):
308 msgcopy['X-Mailman-Copy'] = 'yes'
309 # For the final delivery stage, we can just bulk deliver to a party of
310 # one. ;)
311 bulkdeliver(mlist, msgcopy, msgdata, envsender, failures, conn)
315 def bulkdeliver(mlist, msg, msgdata, envsender, failures, conn):
316 # Do some final cleanup of the message header. Start by blowing away
317 # any the Sender: and Errors-To: headers so remote MTAs won't be
318 # tempted to delivery bounces there instead of our envelope sender
320 # BAW An interpretation of RFCs 2822 and 2076 could argue for not touching
321 # the Sender header at all. Brad Knowles points out that MTAs tend to
322 # wipe existing Return-Path headers, and old MTAs may still honor
323 # Errors-To while new ones will at worst ignore the header.
324 del msg['sender']
325 del msg['errors-to']
326 msg['Sender'] = envsender
327 msg['Errors-To'] = envsender
328 # Get the plain, flattened text of the message, sans unixfrom
329 msgtext = msg.as_string()
330 refused = {}
331 recips = msgdata['recips']
332 msgid = msg['message-id']
333 try:
334 # Send the message
335 refused = conn.sendmail(envsender, recips, msgtext)
336 except smtplib.SMTPRecipientsRefused as error:
337 log.error('%s recipients refused: %s', msgid, error)
338 refused = error.recipients
339 except smtplib.SMTPResponseException as error:
340 log.error('%s SMTP session failure: %s, %s',
341 msgid, error.smtp_code, error.smtp_error)
342 # If this was a permanent failure, don't add the recipients to the
343 # refused, because we don't want them to be added to failures.
344 # Otherwise, if the MTA rejects the message because of the message
345 # content (e.g. it's spam, virii, or has syntactic problems), then
346 # this will end up registering a bounce score for every recipient.
347 # Definitely /not/ what we want.
348 if error.smtp_code < 500 or error.smtp_code == 552:
349 # It's a temporary failure
350 for r in recips:
351 refused[r] = (error.smtp_code, error.smtp_error)
352 except (socket.error, IOError, smtplib.SMTPException) as error:
353 # MTA not responding, or other socket problems, or any other kind of
354 # SMTPException. In that case, nothing got delivered, so treat this
355 # as a temporary failure.
356 log.error('%s low level smtp error: %s', msgid, error)
357 error = str(error)
358 for r in recips:
359 refused[r] = (-1, error)
360 failures.update(refused)
364 class SMTPDirect:
365 """SMTP delivery."""
367 implements(IHandler)
369 name = 'smtp-direct'
370 description = _('SMTP delivery.')
372 def process(self, mlist, msg, msgdata):
373 """See `IHandler`."""
374 process(mlist, msg, msgdata)