1 # Copyright (C) 2002-2023 by the Free Software Foundation, Inc.
3 # This file is part of GNU Mailman.
5 # GNU Mailman is free software: you can redistribute it and/or modify it under
6 # the terms of the GNU General Public License as published by the Free
7 # Software Foundation, either version 3 of the License, or (at your option)
10 # GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 # You should have received a copy of the GNU General Public License along with
16 # GNU Mailman. If not, see <https://www.gnu.org/licenses/>.
18 """MIME-stripping filter for Mailman.
20 This module scans a message for MIME content, removing those sections whose
21 MIME types match one of a list of matches. multipart/alternative sections are
22 replaced by the first non-empty component, and multipart/mixed sections
23 wrapping only single sections after other processing are replaced by their
33 from contextlib
import ExitStack
, suppress
34 from email
.iterators
import typed_subpart_iterator
35 from email
.mime
.message
import MIMEMessage
36 from email
.mime
.text
import MIMEText
37 from itertools
import count
38 from lazr
.config
import as_boolean
39 from mailman
.config
import config
40 from mailman
.core
.i18n
import _
41 from mailman
.email
.message
import OwnerNotification
42 from mailman
.interfaces
.action
import FilterAction
43 from mailman
.interfaces
.handler
import IHandler
44 from mailman
.interfaces
.pipeline
import DiscardMessage
, RejectMessage
45 from mailman
.utilities
.string
import oneline
46 from mailman
.version
import VERSION
47 from public
import public
48 from string
import Template
49 from subprocess
import CalledProcessError
, check_output
50 from zope
.interface
import implementer
53 log
= logging
.getLogger('mailman.error')
56 def dispose(mlist
, msg
, msgdata
, why
):
57 if mlist
.filter_action
is FilterAction
.reject
:
58 # Bounce the message to the original author.
59 raise RejectMessage(why
)
60 elif (mlist
.filter_action
is FilterAction
.forward
and
61 msgdata
.get('fwd_preserve', True)):
62 # Forward it on to the list moderators.
64 The attached message matched the ${mlist.display_name} mailing list's content
65 filtering rules and was prevented from being forwarded on to the list
66 membership. You are receiving the only remaining copy of the discarded
70 subject
= _('Content filter message notification')
71 notice
= OwnerNotification(mlist
, subject
, roster
=mlist
.administrators
)
72 notice
.set_type('multipart/mixed')
73 notice
.attach(MIMEText(text
))
74 notice
.attach(MIMEMessage(msg
))
76 # Let this fall through so the original message gets discarded.
77 elif (mlist
.filter_action
is FilterAction
.preserve
and
78 msgdata
.get('fwd_preserve', True)):
79 if as_boolean(config
.mailman
.filtered_messages_are_preservable
):
80 # This is just like discarding the message except that a copy is
81 # placed in the 'bad' queue should the site administrator want to
82 # inspect the message.
83 filebase
= config
.switchboards
['bad'].enqueue(msg
, msgdata
)
84 log
.info('{} preserved in file base {}'.format(
85 msg
.get('message-id', 'n/a'), filebase
))
86 elif mlist
.filter_action
is FilterAction
.discard
:
88 elif msgdata
.get('fwd_preserve', True):
90 '{} invalid FilterAction: {}. Treating as discard'.format(
91 mlist
.fqdn_listname
, mlist
.filter_action
.name
))
92 # Most cases also discard the message
93 raise DiscardMessage(why
)
96 def process(mlist
, msg
, msgdata
):
97 global attach_report
, report
99 ___________________________________________
100 Mailman's content filtering has removed the
101 following MIME parts from this message.
103 attach_report
= False
104 ctype
= msg
.get_content_type()
105 mtype
= msg
.get_content_maintype()
106 # Check to see if the outer type matches one of the filter types
107 filtertypes
= set(mlist
.filter_types
)
108 passtypes
= set(mlist
.pass_types
)
109 if ctype
in filtertypes
or mtype
in filtertypes
:
110 dispose(mlist
, msg
, msgdata
,
111 _("The message's content type was explicitly disallowed"))
112 # Check to see if there is a pass types and the outer type doesn't match
114 if passtypes
and not (ctype
in passtypes
or mtype
in passtypes
):
115 dispose(mlist
, msg
, msgdata
,
116 _("The message's content type was not explicitly allowed"))
117 # Filter by file extensions
118 filterexts
= set(mlist
.filter_extensions
)
119 passexts
= set(mlist
.pass_extensions
)
120 fext
= get_file_ext(msg
)
122 if fext
in filterexts
:
125 _("The message's file extension was explicitly disallowed"))
126 if passexts
and not (fext
in passexts
):
129 _("The message's file extension was not explicitly allowed"))
130 numparts
= len([subpart
for subpart
in msg
.walk()])
131 # If the message is a multipart, filter out matching subparts
132 if msg
.is_multipart():
133 # Recursively filter out any subparts that match the filter list
134 prelen
= len(msg
.get_payload())
135 premsg
= copy
.deepcopy(msg
)
136 filter_parts(msg
, filtertypes
, passtypes
, filterexts
, passexts
)
137 # If the outer message is now an empty multipart (and it wasn't
138 # before!) then, again it gets discarded.
139 postlen
= len(msg
.get_payload())
140 if postlen
== 0 and prelen
> 0:
141 dispose(mlist
, premsg
, msgdata
,
142 _("After content filtering, the message was empty"))
143 # Now replace all multipart/alternatives with just the first non-empty
144 # alternative. BAW: We have to special case when the outer part is a
145 # multipart/alternative because we need to retain most of the outer part's
146 # headers. For now we'll move the subpart's payload into the outer part,
147 # and then copy over its Content-Type: and Content-Transfer-Encoding:
148 # headers (any others?).
149 if mlist
.collapse_alternatives
:
150 collapse_multipart_alternatives(msg
)
151 if ctype
== 'multipart/alternative':
152 firstalt
= msg
.get_payload(0)
153 reset_payload(msg
, firstalt
)
155 Replaced multipart/alternative part with first alternative.
157 # MAS Not setting attach_report True here will not report if the
158 # only change is collapsing an outer MPA message. On lists where
159 # most people post from MUAs that compose HTML and send MPA,
160 # setting this here will add this report to most messages which
162 # attach_report = True
163 # Now that we've collapsed the MPA parts, go through the message
164 # and recast any multipart parts with only one sub-part as just
166 if msg
.is_multipart():
167 recast_multipart(msg
)
168 # If we removed some parts, make note of this
170 if numparts
!= len([subpart
for subpart
in msg
.walk()]):
172 # Now perhaps convert all text/html to text/plain.
173 if mlist
.convert_html_to_plaintext
:
174 changedp
+= to_plaintext(msg
)
175 # If we're left with only two parts, an empty body and one attachment,
176 # recast the message to one of just that part
177 if msg
.is_multipart() and len(msg
.get_payload()) == 2:
178 if msg
.get_payload(0).get_payload() == '':
179 useful
= msg
.get_payload(1)
180 reset_payload(msg
, useful
)
183 msg
['X-Content-Filtered-By'] = 'Mailman/MimeDel {}'.format(VERSION
)
184 if attach_report
and as_boolean(config
.mailman
.filter_report
):
185 if msg
.is_multipart():
186 if msg
.get_content_type() == 'multipart/mixed':
187 msg
.attach(MIMEText(report
))
189 # Some non-mixed multipart, we need to wrap it.
190 # This is based on code in handlers/decorate.py
191 # Because of the way Message objects are passed around to
192 # process(), we need to play tricks with the outer message
193 # -- i.e. the outer one must remain the same instance.
194 # So we're going to create a clone of the outer message,
195 # with all the header chrome intact, then delete unwanted
197 inner
= copy
.deepcopy(msg
)
198 # Which headers to keep? Let's just do the Content-* headers
199 for h
, v
in inner
.items():
200 if not h
.lower().startswith('content-'):
202 # Now, play games with the outer message to make it contain two
203 # subparts: the wrapped message, and the report.
205 payload
.append(MIMEText(report
))
206 msg
.set_payload(payload
)
207 del msg
['content-type']
208 del msg
['content-transfer-encoding']
209 del msg
['content-disposition']
210 msg
['Content-Type'] = 'multipart/mixed'
212 pl
= msg
.get_payload(decode
=True)
213 cset
= msg
.get_content_charset(None) or 'us-ascii'
214 del msg
['content-transfer-encoding']
215 new_pl
= pl
.decode(cset
)
216 if not pl
.endswith(b
'\n'):
219 msg
.set_payload(new_pl
, cset
)
222 def reset_payload(msg
, subpart
):
223 # Reset payload of msg to contents of subpart, and fix up content headers
224 if subpart
.is_multipart():
225 msg
.set_payload(subpart
.get_payload())
227 cset
= subpart
.get_content_charset() or 'us-ascii'
228 msg
.set_payload(subpart
.get_payload(decode
=True).decode(
229 cset
, errors
='replace'),
231 # Don't restore Content-Transfer-Encoding; set_payload sets it based
233 del msg
['content-type']
234 del msg
['content-disposition']
235 del msg
['content-description']
236 msg
['Content-Type'] = subpart
.get('content-type', 'text/plain')
237 cdisp
= subpart
.get('content-disposition')
239 msg
['Content-Disposition'] = cdisp
240 cdesc
= subpart
.get('content-description')
242 msg
['Content-Description'] = cdesc
245 def filter_parts(msg
, filtertypes
, passtypes
, filterexts
, passexts
):
246 global attach_report
, report
247 # Look at all the message's subparts, and recursively filter
248 if not msg
.is_multipart():
250 payload
= msg
.get_payload()
251 prelen
= len(payload
)
253 for subpart
in payload
:
254 keep
= filter_parts(subpart
, filtertypes
, passtypes
,
255 filterexts
, passexts
)
258 ctype
= subpart
.get_content_type()
259 mtype
= subpart
.get_content_maintype()
260 fname
= subpart
.get_filename('') or subpart
.get_param('name', '')
261 if ctype
in filtertypes
or mtype
in filtertypes
:
262 # Throw this subpart away
263 report
+= '\nContent-Type: %s\n' % ctype
265 report
+= ' ' + _('Name: ${fname}\n')
268 if passtypes
and not (ctype
in passtypes
or mtype
in passtypes
):
269 # Throw this subpart away
270 report
+= '\nContent-Type: %s\n' % ctype
272 report
+= ' ' + _('Name: ${fname}\n')
275 # check file extension
276 fext
= get_file_ext(subpart
)
278 if fext
in filterexts
:
279 report
+= '\nContent-Type: %s\n' % ctype
281 report
+= ' ' + _('Name: ${fname}\n')
284 if passexts
and not (fext
in passexts
):
285 report
+= '\nContent-Type: %s\n' % ctype
287 report
+= ' ' + _('Name: ${fname}\n')
290 newpayload
.append(subpart
)
291 # Check to see if we discarded all the subparts
292 postlen
= len(newpayload
)
293 msg
.set_payload(newpayload
)
294 if postlen
== 0 and prelen
> 0:
295 # We threw away everything
300 def collapse_multipart_alternatives(msg
):
301 global attach_report
, report
302 if not msg
.is_multipart():
305 for subpart
in msg
.get_payload():
306 if subpart
.get_content_type() == 'multipart/alternative':
307 with
suppress(IndexError):
308 firstalt
= subpart
.get_payload(0)
309 if msg
.get_content_type() == 'message/rfc822':
310 # This is a multipart/alternative message in a
311 # message/rfc822 part. We treat it specially so as not to
313 reset_payload(subpart
, firstalt
)
314 newpayload
.append(subpart
)
316 newpayload
.append(firstalt
)
318 Replaced multipart/alternative part with first alternative.
321 elif subpart
.is_multipart():
322 collapse_multipart_alternatives(subpart
)
323 newpayload
.append(subpart
)
325 newpayload
.append(subpart
)
326 msg
.set_payload(newpayload
)
329 def recast_multipart(msg
):
330 # If we're left with a multipart message with only one sub-part, recast
331 # the message to just the sub-part, but not if the part is message/rfc822
332 # because we don't want to lose the headers.
333 # Also, if this is a multipart/signed part, stop now as the original part
334 # may have had a multipart sub-part with only one sub-sub-part, the sig
335 # may still be valid and going further may break it. (LP: #1551075)
336 if msg
.get_content_type() == 'multipart/signed':
338 if msg
.is_multipart():
339 if (len(msg
.get_payload()) == 1 and
340 msg
.get_content_type() != 'message/rfc822'):
341 reset_payload(msg
, msg
.get_payload(0))
342 # now that we've recast this part, check the subordinate parts
343 recast_multipart(msg
)
345 # This part's OK but check deeper.
346 for part
in msg
.get_payload():
347 recast_multipart(part
)
350 def to_plaintext(msg
):
353 with
ExitStack() as resources
:
354 tempdir
= tempfile
.mkdtemp()
355 resources
.callback(shutil
.rmtree
, tempdir
)
356 for subpart
in typed_subpart_iterator(msg
, 'text', 'html'):
357 filename
= os
.path
.join(tempdir
, '{}.html'.format(next(counter
)))
358 cset
= subpart
.get_content_charset('us-ascii')
359 with
open(filename
, 'w', encoding
='utf-8') as fp
:
360 fp
.write(subpart
.get_payload(decode
=True).decode(cset
,
362 template
= Template(config
.mailman
.html_to_plain_text_command
)
363 command
= template
.safe_substitute(filename
=filename
).split()
365 stdout
= check_output(command
, universal_newlines
=True)
366 except (CalledProcessError
, FileNotFoundError
, PermissionError
):
367 log
.exception('HTML -> text/plain command error')
369 # Replace the payload of the subpart with the converted text
370 # and tweak the content type.
371 del subpart
['content-transfer-encoding']
372 subpart
.set_payload(stdout
, charset
=cset
)
373 subpart
.set_type('text/plain')
380 Get filename extension. Caution: some virus don't put filename
381 in 'Content-Disposition' header.
384 filename
= m
.get_filename('') or m
.get_param('name', '')
386 fext
= os
.path
.splitext(oneline(filename
, 'utf-8', in_unicode
=True))[1]
395 @implementer(IHandler
)
397 """Filter the MIME content of messages."""
400 description
= _('Filter the MIME content of messages.')
402 def process(self
, mlist
, msg
, msgdata
):
404 if not mlist
.filter_content
:
406 if msgdata
.get('isdigest'):
408 process(mlist
, msg
, msgdata
)