src/mailman/runners/digest.py

   1 # Copyright (C) 2009-2016 by the Free Software Foundation, Inc.
   2 #
   3 # This file is part of GNU Mailman.
   4 #
   5 # GNU Mailman is free software: you can redistribute it and/or modify it under
   6 # the terms of the GNU General Public License as published by the Free
   7 # Software Foundation, either version 3 of the License, or (at your option)
   8 # any later version.
   9 #
  10 # GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
  11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  13 # more details.
  14 #
  15 # You should have received a copy of the GNU General Public License along with
  16 # GNU Mailman.  If not, see <http://www.gnu.org/licenses/>.
  17
  18 """Digest runner."""
  19
  20 __all__ = [
  21     'DigestRunner',
  22     ]
  23
  24
  25 import re
  26 import logging
  27
  28 from copy import deepcopy
  29 from email.header import Header
  30 from email.mime.message import MIMEMessage
  31 from email.mime.text import MIMEText
  32 from email.utils import formatdate, getaddresses, make_msgid
  33 from io import StringIO
  34 from mailman.config import config
  35 from mailman.core.i18n import _
  36 from mailman.core.runner import Runner
  37 from mailman.email.message import Message, MultipartDigestMessage
  38 from mailman.handlers.decorate import decorate
  39 from mailman.interfaces.member import DeliveryMode, DeliveryStatus
  40 from mailman.utilities.i18n import make
  41 from mailman.utilities.mailbox import Mailbox
  42 from mailman.utilities.string import oneline, wrap
  43 from urllib.error import URLError
  44
  45
  46 log = logging.getLogger('mailman.error')
  47
  48
  49 \f
  50 class Digester:
  51     """Base digester class."""
  52
  53     def __init__(self, mlist, volume, digest_number):
  54         self._mlist = mlist
  55         self._charset = mlist.preferred_language.charset
  56         # This will be used in the Subject, so use $-strings.
  57         self._digest_id = _(
  58             '$mlist.display_name Digest, Vol $volume, Issue $digest_number')
  59         self._subject = Header(self._digest_id,
  60                                self._charset,
  61                                header_name='Subject')
  62         self._message = self._make_message()
  63         self._message['From'] = mlist.request_address
  64         self._message['Subject'] = self._subject
  65         self._message['To'] = mlist.posting_address
  66         self._message['Reply-To'] = mlist.posting_address
  67         self._message['Date'] = formatdate(localtime=True)
  68         self._message['Message-ID'] = make_msgid()
  69         # In the rfc1153 digest, the masthead contains the digest boilerplate
  70         # plus any digest header.  In the MIME digests, the masthead and
  71         # digest header are separate MIME subobjects.  In either case, it's
  72         # the first thing in the digest, and we can calculate it now, so go
  73         # ahead and add it now.
  74         self._masthead = make('masthead.txt',
  75                               mailing_list=mlist,
  76                               display_name=mlist.display_name,
  77                               got_list_email=mlist.posting_address,
  78                               got_listinfo_url=mlist.script_url('listinfo'),
  79                               got_request_email=mlist.request_address,
  80                               got_owner_email=mlist.owner_address,
  81                               )
  82         # Set things up for the table of contents.
  83         if mlist.digest_header_uri is not None:
  84             try:
  85                 self._header = decorate(mlist, mlist.digest_header_uri)
  86             except URLError:
  87                 log.exception(
  88                     'Digest header decorator URI not found ({0}): {1}'.format(
  89                         mlist.fqdn_listname, mlist.digest_header_uri))
  90                 self._header = ''
  91         self._toc = StringIO()
  92         print(_("Today's Topics:\n"), file=self._toc)
  93
  94     def add_to_toc(self, msg, count):
  95         """Add a message to the table of contents."""
  96         subject = msg.get('subject', _('(no subject)'))
  97         subject = oneline(subject, in_unicode=True)
  98         # Don't include the redundant subject prefix in the toc
  99         mo = re.match('(re:? *)?({0})'.format(
 100             re.escape(self._mlist.subject_prefix)),
 101                       subject, re.IGNORECASE)
 102         if mo:
 103             subject = subject[:mo.start(2)] + subject[mo.end(2):]
 104         # Take only the first author we find.
 105         username = ''
 106         addresses = getaddresses(
 107             [oneline(msg.get('from', ''), in_unicode=True)])
 108         if addresses:
 109             username = addresses[0][0]
 110             if not username:
 111                 username = addresses[0][1]
 112         if username:
 113             username = ' ({0})'.format(username)
 114         lines = wrap('{0:2}. {1}'. format(count, subject), 65).split('\n')
 115         # See if the user's name can fit on the last line
 116         if len(lines[-1]) + len(username) > 70:
 117             lines.append(username)
 118         else:
 119             lines[-1] += username
 120         # Add this subject to the accumulating topics
 121         first = True
 122         for line in lines:
 123             if first:
 124                 print(' ', line, file=self._toc)
 125                 first = False
 126             else:
 127                 print('     ', line.lstrip(), file=self._toc)
 128
 129     def add_message(self, msg, count):
 130         """Add the message to the digest."""
 131         # We do not want all the headers of the original message to leak
 132         # through in the digest messages.
 133         keepers = {}
 134         for header in self._keepers:
 135             keepers[header] = msg.get_all(header, [])
 136         # Remove all the unkempt <wink> headers.  Use .keys() to allow for
 137         # destructive iteration...
 138         for header in msg.keys():
 139             del msg[header]
 140         # ... and add them in the designated order.
 141         for header in self._keepers:
 142             for value in keepers[header]:
 143                 msg[header] = value
 144         # Add some useful extra stuff.
 145         msg['Message'] = count.decode('utf-8')
 146
 147
 148
 149 \f
 150 class MIMEDigester(Digester):
 151     """A MIME digester."""
 152
 153     def __init__(self, mlist, volume, digest_number):
 154         super().__init__(mlist, volume, digest_number)
 155         masthead = MIMEText(self._masthead.encode(self._charset),
 156                             _charset=self._charset)
 157         masthead['Content-Description'] = self._subject
 158         self._message.attach(masthead)
 159         # Add the optional digest header.
 160         if mlist.digest_header_uri is not None:
 161             header = MIMEText(self._header.encode(self._charset),
 162                               _charset=self._charset)
 163             header['Content-Description'] = _('Digest Header')
 164             self._message.attach(header)
 165         # Calculate the set of headers we're to keep in the MIME digest.
 166         self._keepers = set(config.digests.mime_digest_keep_headers.split())
 167
 168     def _make_message(self):
 169         return MultipartDigestMessage('mixed')
 170
 171     def add_toc(self, count):
 172         """Add the table of contents."""
 173         toc_text = self._toc.getvalue()
 174         try:
 175             toc_part = MIMEText(toc_text.encode(self._charset),
 176                                 _charset=self._charset)
 177         except UnicodeError:
 178             toc_part = MIMEText(toc_text.encode('utf-8'), _charset='utf-8')
 179         toc_part['Content-Description']= _("Today's Topics ($count messages)")
 180         self._message.attach(toc_part)
 181
 182     def add_message(self, msg, count):
 183         """Add the message to the digest."""
 184         # Make a copy of the message object, since the RFC 1153 processing
 185         # scrubs out attachments.
 186         self._message.attach(MIMEMessage(deepcopy(msg)))
 187
 188     def finish(self):
 189         """Finish up the digest, producing the email-ready copy."""
 190         if self._mlist.digest_footer_uri is not None:
 191             try:
 192                 footer_text = decorate(
 193                     self._mlist, self._mlist.digest_footer_uri)
 194             except URLError:
 195                 log.exception(
 196                     'Digest footer decorator URI not found ({0}): {1}'.format(
 197                         self._mlist.fqdn_listname,
 198                         self._mlist.digest_footer_uri))
 199                 footer_text = ''
 200             footer = MIMEText(footer_text.encode(self._charset),
 201                               _charset=self._charset)
 202             footer['Content-Description'] = _('Digest Footer')
 203             self._message.attach(footer)
 204         # This stuff is outside the normal MIME goo, and it's what the old
 205         # MIME digester did.  No one seemed to complain, probably because you
 206         # won't see it in an MUA that can't display the raw message.  We've
 207         # never got complaints before, but if we do, just wax this.  It's
 208         # primarily included for (marginally useful) backwards compatibility.
 209         self._message.postamble = _('End of ') + self._digest_id
 210         return self._message
 211
 212
 213 \f
 214 class RFC1153Digester(Digester):
 215     """A digester of the format specified by RFC 1153."""
 216
 217     def __init__(self, mlist, volume, digest_number):
 218         super().__init__(mlist, volume, digest_number)
 219         self._separator70 = '-' * 70
 220         self._separator30 = '-' * 30
 221         self._text = StringIO()
 222         print(self._masthead, file=self._text)
 223         print(file=self._text)
 224         # Add the optional digest header.
 225         if mlist.digest_header_uri is not None:
 226             print(self._header, file=self._text)
 227             print(file=self._text)
 228         # Calculate the set of headers we're to keep in the RFC1153 digest.
 229         self._keepers = set(config.digests.plain_digest_keep_headers.split())
 230
 231     def _make_message(self):
 232         return Message()
 233
 234     def add_toc(self, count):
 235         """Add the table of contents."""
 236         print(self._toc.getvalue(), file=self._text)
 237         print(file=self._text)
 238         print(self._separator70, file=self._text)
 239         print(file=self._text)
 240
 241     def add_message(self, msg, count):
 242         """Add the message to the digest."""
 243         if count > 1:
 244             print(self._separator30, file=self._text)
 245             print(file=self._text)
 246         # Each message section contains a few headers.
 247         for header in config.digests.plain_digest_keep_headers.split():
 248             if header in msg:
 249                 value = oneline(msg[header], in_unicode=True)
 250                 value = wrap('{0}: {1}'.format(header, value))
 251                 value = '\n\t'.join(value.split('\n'))
 252                 print(value, file=self._text)
 253         print(file=self._text)
 254         # Add the payload.  If the decoded payload is empty, this may be a
 255         # multipart message.  In that case, just stringify it.
 256         payload = msg.get_payload(decode=True)
 257         if not payload:
 258             payload = msg.as_string().split('\n\n', 1)[1]
 259         if isinstance(payload, bytes):
 260             try:
 261                 # Do the decoding inside the try/except so that if the charset
 262                 # conversion fails, we'll just drop back to ascii.
 263                 charset = msg.get_content_charset('us-ascii')
 264                 payload = payload.decode(charset, 'replace')
 265             except (LookupError, TypeError):
 266                 # Unknown or empty charset.
 267                 payload = payload.decode('us-ascii', 'replace')
 268         print(payload, file=self._text)
 269         if not payload.endswith('\n'):
 270             print(file=self._text)
 271
 272     def finish(self):
 273         """Finish up the digest, producing the email-ready copy."""
 274         if self._mlist.digest_footer_uri is not None:
 275             try:
 276                 footer_text = decorate(
 277                     self._mlist, self._mlist.digest_footer_uri)
 278             except URLError:
 279                 log.exception(
 280                     'Digest footer decorator URI not found ({0}): {1}'.format(
 281                         self._mlist.fqdn_listname,
 282                         self._mlist.digest_footer_uri))
 283                 footer_text = ''
 284             # MAS: There is no real place for the digest_footer in an RFC 1153
 285             # compliant digest, so add it as an additional message with
 286             # Subject: Digest Footer
 287             print(self._separator30, file=self._text)
 288             print(file=self._text)
 289             print('Subject: ' + _('Digest Footer'), file=self._text)
 290             print(file=self._text)
 291             print(footer_text, file=self._text)
 292             print(file=self._text)
 293             print(self._separator30, file=self._text)
 294             print(file=self._text)
 295         # Add the sign-off.
 296         sign_off = _('End of ') + self._digest_id
 297         print(sign_off, file=self._text)
 298         print('*' * len(sign_off), file=self._text)
 299         # If the digest message can't be encoded by the list character set,
 300         # fall back to utf-8.
 301         text = self._text.getvalue()
 302         try:
 303             self._message.set_payload(text.encode(self._charset),
 304                                       charset=self._charset)
 305         except UnicodeError:
 306             self._message.set_payload(text.encode('utf-8'), charset='utf-8')
 307         return self._message
 308
 309
 310 \f
 311 class DigestRunner(Runner):
 312     """The digest runner."""
 313
 314     def _dispose(self, mlist, msg, msgdata):
 315         """See `IRunner`."""
 316         volume = msgdata['volume']
 317         digest_number = msgdata['digest_number']
 318         # Backslashes make me cry.
 319         code = mlist.preferred_language.code
 320         with Mailbox(msgdata['digest_path']) as mailbox, _.using(code):
 321             # Create the digesters.
 322             mime_digest = MIMEDigester(mlist, volume, digest_number)
 323             rfc1153_digest = RFC1153Digester(mlist, volume, digest_number)
 324             # Cruise through all the messages in the mailbox, first building
 325             # the table of contents and accumulating Subject: headers and
 326             # authors.  The question really is whether it's better from a1
 327             # performance and memory footprint to go through the mailbox once
 328             # and cache the messages in a list, or to cruise through the
 329             # mailbox twice.  We'll do the latter, but it's a complete guess.
 330             count = None
 331             for count, (key, message) in enumerate(mailbox.iteritems(), 1):
 332                 mime_digest.add_to_toc(message, count)
 333                 rfc1153_digest.add_to_toc(message, count)
 334             assert count is not None, 'No digest messages?'
 335             # Add the table of contents.
 336             mime_digest.add_toc(count)
 337             rfc1153_digest.add_toc(count)
 338             # Cruise through the set of messages a second time, adding them to
 339             # the actual digest.
 340             for count, (key, message) in enumerate(mailbox.iteritems(), 1):
 341                 mime_digest.add_message(message, count)
 342                 rfc1153_digest.add_message(message, count)
 343             # Finish up the digests.
 344             mime = mime_digest.finish()
 345             rfc1153 = rfc1153_digest.finish()
 346         # Calculate the recipients lists
 347         mime_recipients = set()
 348         rfc1153_recipients = set()
 349         # When someone turns off digest delivery, they will get one last
 350         # digest to ensure that there will be no gaps in the messages they
 351         # receive.
 352         digest_members = set(mlist.digest_members.members)
 353         for member in digest_members:
 354             if member.delivery_status is not DeliveryStatus.enabled:
 355                 continue
 356             # Send the digest to the case-preserved address of the digest
 357             # members.
 358             email_address = member.address.original_email
 359             if member.delivery_mode == DeliveryMode.plaintext_digests:
 360                 rfc1153_recipients.add(email_address)
 361             # We currently treat summary_digests the same as mime_digests.
 362             elif member.delivery_mode in (DeliveryMode.mime_digests,
 363                                           DeliveryMode.summary_digests):
 364                 mime_recipients.add(email_address)
 365             else:
 366                 raise AssertionError(
 367                     'Digest member "{0}" unexpected delivery mode: {1}'.format(
 368                         email_address, member.delivery_mode))
 369         # Add also the folks who are receiving one last digest.
 370         for address, delivery_mode in mlist.last_digest_recipients:
 371             if delivery_mode == DeliveryMode.plaintext_digests:
 372                 rfc1153_recipients.add(address.original_email)
 373             # We currently treat summary_digests the same as mime_digests.
 374             elif delivery_mode in (DeliveryMode.mime_digests,
 375                                    DeliveryMode.summary_digests):
 376                 mime_recipients.add(address.original_email)
 377             else:
 378                 raise AssertionError(
 379                     'OLD recipient "{0}" unexpected delivery mode: {1}'.format(
 380                         address, delivery_mode))
 381         # Send the digests to the virgin queue for final delivery.
 382         queue = config.switchboards['virgin']
 383         if len(mime_recipients) > 0:
 384             queue.enqueue(mime,
 385                           recipients=mime_recipients,
 386                           listid=mlist.list_id,
 387                           isdigest=True)
 388         if len(rfc1153_recipients) > 0:
 389             queue.enqueue(rfc1153,
 390                           recipients=rfc1153_recipients,
 391                           listid=mlist.list_id,
 392                           isdigest=True)