Give the SubscriptionWorkflow a member attribute which gets set to the member
[mailman.git] / port_me / gate_news.py
blob72568cd1b12f6bf34e28398377bd4848a41ad995
1 # Copyright (C) 1998-2015 by the Free Software Foundation, Inc.
3 # This file is part of GNU Mailman.
5 # GNU Mailman is free software: you can redistribute it and/or modify it under
6 # the terms of the GNU General Public License as published by the Free
7 # Software Foundation, either version 3 of the License, or (at your option)
8 # any later version.
10 # GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 # more details.
15 # You should have received a copy of the GNU General Public License along with
16 # GNU Mailman. If not, see <http://www.gnu.org/licenses/>.
18 import os
19 import sys
20 import time
21 import socket
22 import logging
23 import nntplib
24 import optparse
25 import email.Errors
27 from email.Parser import Parser
28 from flufl.lock import Lock, TimeOutError
29 from lazr.config import as_host_port
31 from mailman import MailList
32 from mailman import Message
33 from mailman import loginit
34 from mailman.configuration import config
35 from mailman.core.i18n import _
36 from mailman.core.switchboard import Switchboard
37 from mailman.version import MAILMAN_VERSION
39 # Work around known problems with some RedHat cron daemons
40 import signal
41 signal.signal(signal.SIGCHLD, signal.SIG_DFL)
43 NL = '\n'
45 log = None
47 class _ContinueLoop(Exception):
48 pass
52 def parseargs():
53 parser = optparse.OptionParser(version=MAILMAN_VERSION,
54 usage=_("""\
55 %prog [options]
57 Poll the NNTP servers for messages to be gatewayed to mailing lists."""))
58 parser.add_option('-C', '--config',
59 help=_('Alternative configuration file to use'))
60 opts, args = parser.parse_args()
61 if args:
62 parser.print_help()
63 print >> sys.stderr, _('Unexpected arguments')
64 sys.exit(1)
65 return opts, args, parser
69 _hostcache = {}
71 def open_newsgroup(mlist):
72 # Split host:port if given.
73 nntp_host, nntp_port = as_host_port(mlist.nntp_host, default_port=119)
74 # Open up a "mode reader" connection to nntp server. This will be shared
75 # for all the gated lists having the same nntp_host.
76 conn = _hostcache.get(mlist.nntp_host)
77 if conn is None:
78 try:
79 conn = nntplib.NNTP(nntp_host, nntp_port,
80 readermode=True,
81 user=config.NNTP_USERNAME,
82 password=config.NNTP_PASSWORD)
83 except (socket.error, nntplib.NNTPError, IOError) as e:
84 log.error('error opening connection to nntp_host: %s\n%s',
85 mlist.nntp_host, e)
86 raise
87 _hostcache[mlist.nntp_host] = conn
88 # Get the GROUP information for the list, but we're only really interested
89 # in the first article number and the last article number
90 r, c, f, l, n = conn.group(mlist.linked_newsgroup)
91 return conn, int(f), int(l)
94 def clearcache():
95 for conn in set(_hostcache.values()):
96 conn.quit()
97 _hostcache.clear()
101 # This function requires the list to be locked.
102 def poll_newsgroup(mlist, conn, first, last, glock):
103 listname = mlist.internal_name()
104 # NEWNEWS is not portable and has synchronization issues.
105 for num in range(first, last):
106 glock.refresh()
107 try:
108 headers = conn.head(repr(num))[3]
109 found_to = False
110 beenthere = False
111 for header in headers:
112 i = header.find(':')
113 value = header[:i].lower()
114 if i > 0 and value == 'to':
115 found_to = True
116 # FIXME 2010-02-16 barry use List-Post header.
117 if value <> 'x-beenthere':
118 continue
119 if header[i:] == ': %s' % mlist.posting_address:
120 beenthere = True
121 break
122 if not beenthere:
123 body = conn.body(repr(num))[3]
124 # Usenet originated messages will not have a Unix envelope
125 # (i.e. "From " header). This breaks Pipermail archiving, so
126 # we will synthesize one. Be sure to use the format searched
127 # for by mailbox.UnixMailbox._isrealfromline(). BAW: We use
128 # the -bounces address here in case any downstream clients use
129 # the envelope sender for bounces; I'm not sure about this,
130 # but it's the closest to the old semantics.
131 lines = ['From %s %s' % (mlist.GetBouncesEmail(),
132 time.ctime(time.time()))]
133 lines.extend(headers)
134 lines.append('')
135 lines.extend(body)
136 lines.append('')
137 p = Parser(Message.Message)
138 try:
139 msg = p.parsestr(NL.join(lines))
140 except email.Errors.MessageError as e:
141 log.error('email package exception for %s:%d\n%s',
142 mlist.linked_newsgroup, num, e)
143 raise _ContinueLoop
144 if found_to:
145 del msg['X-Originally-To']
146 msg['X-Originally-To'] = msg['To']
147 del msg['To']
148 msg['To'] = mlist.posting_address
149 # Post the message to the locked list
150 inq = Switchboard(config.INQUEUE_DIR)
151 inq.enqueue(msg,
152 listid=mlist.list_id,
153 fromusenet=True)
154 log.info('posted to list %s: %7d', listname, num)
155 except nntplib.NNTPError as e:
156 log.exception('NNTP error for list %s: %7d', listname, num)
157 except _ContinueLoop:
158 continue
159 # Even if we don't post the message because it was seen on the
160 # list already, update the watermark
161 mlist.usenet_watermark = num
165 def process_lists(glock):
166 for listname in config.list_manager.names:
167 glock.refresh()
168 # Open the list unlocked just to check to see if it is gating news to
169 # mail. If not, we're done with the list. Otherwise, lock the list
170 # and gate the group.
171 mlist = MailList.MailList(listname, lock=False)
172 if not mlist.gateway_to_mail:
173 continue
174 # Get the list's watermark, i.e. the last article number that we gated
175 # from news to mail. None means that this list has never polled its
176 # newsgroup and that we should do a catch up.
177 watermark = getattr(mlist, 'usenet_watermark', None)
178 # Open the newsgroup, but let most exceptions percolate up.
179 try:
180 conn, first, last = open_newsgroup(mlist)
181 except (socket.error, nntplib.NNTPError):
182 break
183 log.info('%s: [%d..%d]', listname, first, last)
184 try:
185 try:
186 if watermark is None:
187 mlist.Lock(timeout=config.LIST_LOCK_TIMEOUT)
188 # This is the first time we've tried to gate this
189 # newsgroup. We essentially do a mass catch-up, otherwise
190 # we'd flood the mailing list.
191 mlist.usenet_watermark = last
192 log.info('%s caught up to article %d', listname, last)
193 else:
194 # The list has been polled previously, so now we simply
195 # grab all the messages on the newsgroup that have not
196 # been seen by the mailing list. The first such article
197 # is the maximum of the lowest article available in the
198 # newsgroup and the watermark. It's possible that some
199 # articles have been expired since the last time gate_news
200 # has run. Not much we can do about that.
201 start = max(watermark + 1, first)
202 if start > last:
203 log.info('nothing new for list %s', listname)
204 else:
205 mlist.Lock(timeout=config.LIST_LOCK_TIMEOUT)
206 log.info('gating %s articles [%d..%d]',
207 listname, start, last)
208 # Use last+1 because poll_newsgroup() employes a for
209 # loop over range, and this will not include the last
210 # element in the list.
211 poll_newsgroup(mlist, conn, start, last + 1, glock)
212 except TimeOutError:
213 log.error('Could not acquire list lock: %s', listname)
214 finally:
215 if mlist.Locked():
216 mlist.Save()
217 mlist.Unlock()
218 log.info('%s watermark: %d', listname, mlist.usenet_watermark)
222 def main():
223 opts, args, parser = parseargs()
224 config.load(opts.config)
226 GATENEWS_LOCK_FILE = os.path.join(config.LOCK_DIR, 'gate_news.lock')
227 LOCK_LIFETIME = config.hours(2)
229 loginit.initialize(propagate=True)
230 log = logging.getLogger('mailman.fromusenet')
232 try:
233 with Lock(GATENEWS_LOCK_FILE,
234 # It's okay to hijack this
235 lifetime=LOCK_LIFETIME) as lock:
236 process_lists(lock)
237 clearcache()
238 except TimeOutError:
239 log.error('Could not acquire gate_news lock')
243 if __name__ == '__main__':
244 main()