1 # Copyright (C) 1998-2008 by the Free Software Foundation, Inc.
3 # This file is part of GNU Mailman.
5 # GNU Mailman is free software: you can redistribute it and/or modify it under
6 # the terms of the GNU General Public License as published by the Free
7 # Software Foundation, either version 3 of the License, or (at your option)
10 # GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 # You should have received a copy of the GNU General Public License along with
16 # GNU Mailman. If not, see <http://www.gnu.org/licenses/>.
18 """Miscellaneous essential routines.
20 This includes actual message transmission routines, address checking and
21 message and address munging, a handy-dandy routine to map a function on all
22 the mailing lists, and whatever else doesn't belong elsewhere.
36 import email
.Iterators
38 from email
.Errors
import HeaderParseError
39 from string
import ascii_letters
, digits
, whitespace
, Template
41 import mailman
.templates
43 from mailman
import passwords
44 from mailman
.configuration
import config
45 from mailman
.core
import errors
51 IDENTCHARS
= ascii_letters
+ digits
+ '_'
54 TEMPLATE_DIR
= os
.path
.dirname(mailman
.templates
.__file
__)
56 # Search for $(identifier)s strings, except that the trailing s is optional,
57 # since that's a common mistake
58 cre
= re
.compile(r
'%\(([_a-z]\w*?)\)s?', re
.IGNORECASE
)
59 # Search for $$, $identifier, or ${identifier}
60 dre
= re
.compile(r
'(\${2})|\$([_a-z]\w*)|\${([_a-z]\w*)}', re
.IGNORECASE
)
62 log
= logging
.getLogger('mailman.error')
66 def list_exists(fqdn_listname
):
67 """Return true iff list `fqdn_listname' exists."""
68 return config
.db
.list_manager
.get(fqdn_listname
) is not None
72 """Return the fqdn names of all lists in default list directory."""
73 return ['%s@%s' % (listname
, hostname
)
74 for listname
, hostname
in config
.db
.list_manager
.get_list_names()]
77 def split_listname(listname
):
79 return listname
.split(AT
, 1)
80 return listname
, config
.DEFAULT_EMAIL_HOST
83 def fqdn_listname(listname
, hostname
=None):
85 return AT
.join(split_listname(listname
))
86 return AT
.join((listname
, hostname
))
90 # a much more naive implementation than say, Emacs's fill-paragraph!
91 def wrap(text
, column
=70, honor_leading_ws
=True):
92 """Wrap and fill the text to the specified column.
94 Wrapping is always in effect, although if it is not possible to wrap a
95 line (because some word is longer than `column' characters) the line is
96 broken at the next available whitespace boundary. Paragraphs are also
97 always filled, unless honor_leading_ws is true and the line begins with
98 whitespace. This is the algorithm that the Python FAQ wizard uses, and
99 seems like a good compromise.
103 # first split the text into paragraphs, defined as a blank line
104 paras
= re
.split('\n\n', text
)
109 for line
in para
.split(NL
):
113 if honor_leading_ws
and line
[0] in whitespace
:
117 if fillprev
and fillthis
:
118 # if the previous line should be filled, then just append a
119 # single space, and the rest of the current line
120 lines
[-1] = lines
[-1].rstrip() + ' ' + line
122 # no fill, i.e. retain newline
128 if len(text
) <= column
:
133 # find the last whitespace character
134 while bol
> 0 and text
[bol
] not in whitespace
:
136 # now find the last non-whitespace character
138 while eol
> 0 and text
[eol
] in whitespace
:
140 # watch out for text that's longer than the column width
142 # break on whitespace after column
144 while eol
< len(text
) and text
[eol
] not in whitespace
:
147 while bol
< len(text
) and text
[bol
] in whitespace
:
150 line
= text
[:eol
+1] + '\n'
151 # find the next non-whitespace character
153 while bol
< len(text
) and text
[bol
] in whitespace
:
160 # end for text in lines
161 # the last two newlines are bogus
166 def QuotePeriods(text
):
169 return JOINER
.join(text
.split(SEP
))
172 # This takes an email address, and returns a tuple containing (user,host)
173 def ParseEmail(email
):
176 email
= email
.lower()
177 at_sign
= email
.find('@')
180 user
= email
[:at_sign
]
181 rest
= email
[at_sign
+1:]
182 domain
= rest
.split('.')
187 "returns the address with the domain part lowercased"
188 atind
= addr
.find('@')
189 if atind
== -1: # no domain part
191 return addr
[:atind
] + '@' + addr
[atind
+1:].lower()
194 # TBD: what other characters should be disallowed?
195 _badchars
= re
.compile(r
'[][()<>|;^,\000-\037\177-\377]')
197 def ValidateEmail(s
):
198 """Verify that the an email address isn't grossly evil."""
199 # Pretty minimal, cheesy check. We could do better...
200 if not s
or ' ' in s
:
201 raise errors
.InvalidEmailAddress(repr(s
))
202 if _badchars
.search(s
) or s
[0] == '-':
203 raise errors
.InvalidEmailAddress(repr(s
))
204 user
, domain_parts
= ParseEmail(s
)
205 # Local, unqualified addresses are not allowed.
207 raise errors
.InvalidEmailAddress(repr(s
))
208 if len(domain_parts
) < 2:
209 raise errors
.InvalidEmailAddress(repr(s
))
213 # Patterns which may be used to form malicious path to inject a new
214 # line in the mailman error log. (TK: advisory by Moritz Naumann)
215 CRNLpat
= re
.compile(r
'[^\x21-\x7e]')
217 def GetPathPieces(envar
='PATH_INFO'):
218 path
= os
.environ
.get(envar
)
220 if CRNLpat
.search(path
):
221 path
= CRNLpat
.split(path
)[0]
222 log
.error('Warning: Possible malformed path attack.')
223 return [p
for p
in path
.split('/') if p
]
228 def ScriptURL(target
):
229 up
= '../' * len(GetPathPieces())
230 return '%s%s' % (up
, target
+ config
.CGIEXT
)
234 def GetPossibleMatchingAddrs(name
):
235 """returns a sorted list of addresses that could possibly match
238 For Example, given scott@pobox.com, return ['scott@pobox.com'],
239 given scott@blackbox.pobox.com return ['scott@blackbox.pobox.com',
240 'scott@pobox.com']"""
243 user
, domain
= ParseEmail(name
)
247 while len(domain
) >= 2:
248 res
.append("%s@%s" % (user
, DOT
.join(domain
)))
254 def List2Dict(L
, foldcase
=False):
255 """Return a dict keyed by the entries in the list passed to it."""
267 _vowels
= ('a', 'e', 'i', 'o', 'u')
268 _consonants
= ('b', 'c', 'd', 'f', 'g', 'h', 'k', 'm', 'n',
269 'p', 'r', 's', 't', 'v', 'w', 'x', 'z')
273 for c
in _consonants
:
274 _syllables
.append(c
+v
)
275 _syllables
.append(v
+c
)
278 def UserFriendly_MakeRandomPassword(length
):
280 while len(syls
) * 2 < length
:
281 syls
.append(random
.choice(_syllables
))
282 return EMPTYSTRING
.join(syls
)[:length
]
285 def Secure_MakeRandomPassword(length
):
290 while bytesread
< length
:
292 # Python 2.4 has this on available systems.
293 newbytes
= os
.urandom(length
- bytesread
)
294 except (AttributeError, NotImplementedError):
297 fd
= os
.open('/dev/urandom', os
.O_RDONLY
)
299 if e
.errno
<> errno
.ENOENT
:
301 # We have no available source of cryptographically
302 # secure random characters. Log an error and fallback
303 # to the user friendly passwords.
304 log
.error('urandom not available, passwords not secure')
305 return UserFriendly_MakeRandomPassword(length
)
306 newbytes
= os
.read(fd
, length
- bytesread
)
307 bytes
.append(newbytes
)
308 bytesread
+= len(newbytes
)
309 s
= base64
.encodestring(EMPTYSTRING
.join(bytes
))
310 # base64 will expand the string by 4/3rds
311 return s
.replace('\n', '')[:length
]
317 def MakeRandomPassword(length
=None):
319 length
= config
.MEMBER_PASSWORD_LENGTH
320 if config
.USER_FRIENDLY_PASSWORDS
:
321 password
= UserFriendly_MakeRandomPassword(length
)
323 password
= Secure_MakeRandomPassword(length
)
324 return password
.decode('ascii')
328 chr1
= int(random
.random() * 52)
329 chr2
= int(random
.random() * 52)
337 return "%c%c" % tuple(map(mkletter
, (chr1
, chr2
)))
341 def set_global_password(pw
, siteadmin
=True, scheme
=None):
343 scheme
= passwords
.Schemes
.ssha
345 filename
= config
.SITE_PW_FILE
347 filename
= config
.LISTCREATOR_PW_FILE
349 fp
= open(filename
, 'w')
350 print >> fp
, passwords
.make_secret(pw
, scheme
)
355 def get_global_password(siteadmin
=True):
357 filename
= config
.SITE_PW_FILE
359 filename
= config
.LISTCREATOR_PW_FILE
362 challenge
= fp
.read()[:-1] # strip off trailing nl
365 if e
.errno
<> errno
.ENOENT
:
367 # It's okay not to have a site admin password
372 def check_global_password(response
, siteadmin
=True):
373 challenge
= get_global_password(siteadmin
)
374 if challenge
is None:
376 return passwords
.check_response(challenge
, response
)
381 return cgi
.escape(s
, quote
=True)
385 parts
= s
.split(':', 1)
388 return parts
[0], int(parts
[1])
396 # Just changing these two functions should be enough to control the way
397 # that email address obscuring is handled.
398 def ObscureEmail(addr
, for_text
=False):
399 """Make email address unrecognizable to web spiders, but invertable.
401 When for_text option is set (not default), make a sentence fragment
402 instead of a token."""
404 return addr
.replace('@', ' at ')
406 return addr
.replace('@', '--at--')
408 def UnobscureEmail(addr
):
409 """Invert ObscureEmail() conversion."""
410 # Contrived to act as an identity operation on already-unobscured
411 # emails, so routines expecting obscured ones will accept both.
412 return addr
.replace('--at--', '@')
416 class OuterExit(Exception):
419 def findtext(templatefile
, dict=None, raw
=False, lang
=None, mlist
=None):
420 # Make some text from a template file. The order of searches depends on
421 # whether mlist and lang are provided. Once the templatefile is found,
422 # string substitution is performed by interpolation in `dict'. If `raw'
423 # is false, the resulting text is wrapped/filled by calling wrap().
425 # When looking for a template in a specific language, there are 4 places
426 # that are searched, in this order:
428 # 1. the list-specific language directory
429 # lists/<listname>/<language>
431 # 2. the domain-specific language directory
432 # templates/<list.host_name>/<language>
434 # 3. the site-wide language directory
435 # templates/site/<language>
437 # 4. the global default language directory
438 # templates/<language>
440 # The first match found stops the search. In this way, you can specialize
441 # templates at the desired level, or, if you use only the default
442 # templates, you don't need to change anything. You should never modify
443 # files in the templates/<language> subdirectory, since Mailman will
444 # overwrite these when you upgrade. That's what the templates/site
445 # language directories are for.
447 # A further complication is that the language to search for is determined
448 # by both the `lang' and `mlist' arguments. The search order there is
449 # that if lang is given, then the 4 locations above are searched,
450 # substituting lang for <language>. If no match is found, and mlist is
451 # given, then the 4 locations are searched using the list's preferred
452 # language. After that, the server default language is used for
453 # <language>. If that still doesn't yield a template, then the standard
454 # distribution's English language template is used as an ultimate
455 # fallback, and when lang is not 'en', the resulting template is passed
456 # through the translation service. If this template is missing you've got
459 # A word on backwards compatibility: Mailman versions prior to 2.1 stored
460 # templates in templates/*.{html,txt} and lists/<listname>/*.{html,txt}.
461 # Those directories are no longer searched so if you've got customizations
462 # in those files, you should move them to the appropriate directory based
463 # on the above description. Mailman's upgrade script cannot do this for
466 # The function has been revised and renamed as it now returns both the
467 # template text and the path from which it retrieved the template. The
468 # original function is now a wrapper which just returns the template text
469 # as before, by calling this renamed function and discarding the second
472 # Calculate the languages to scan
476 if mlist
is not None:
477 languages
.add(mlist
.preferred_language
)
478 languages
.add(config
.DEFAULT_SERVER_LANGUAGE
)
479 assert None not in languages
, 'None in languages'
480 # Calculate the locations to scan
482 if mlist
is not None:
483 searchdirs
.append(mlist
.data_path
)
484 searchdirs
.append(os
.path
.join(TEMPLATE_DIR
, mlist
.host_name
))
485 searchdirs
.append(os
.path
.join(TEMPLATE_DIR
, 'site'))
486 searchdirs
.append(TEMPLATE_DIR
)
490 for lang
in languages
:
491 for dir in searchdirs
:
492 filename
= os
.path
.join(dir, lang
, templatefile
)
497 if e
.errno
<> errno
.ENOENT
: raise
498 # Okay, it doesn't exist, keep looping
503 # Try one last time with the distro English template, which, unless
504 # you've got a really broken installation, must be there.
506 filename
= os
.path
.join(TEMPLATE_DIR
, 'en', templatefile
)
509 if e
.errno
<> errno
.ENOENT
:
511 # We never found the template. BAD!
512 raise IOError(errno
.ENOENT
, 'No template file found', templatefile
)
514 from mailman
.i18n
import get_translation
516 data
= fp
.read()[:-1]
517 template
= get_translation().ugettext(data
)
522 template
= unicode(template
, GetCharSet(lang
), 'replace')
526 text
= Template(template
).safe_substitute(**dict)
527 except (TypeError, ValueError):
528 # The template is really screwed up
529 log
.exception('broken template: %s', filename
)
531 return text
, filename
532 return wrap(text
), filename
535 def maketext(templatefile
, dict=None, raw
=False, lang
=None, mlist
=None):
536 return findtext(templatefile
, dict, raw
, lang
, mlist
)[0]
540 def GetRequestURI(fallback
=None, escape
=True):
541 """Return the full virtual path this CGI script was invoked with.
543 Newer web servers seems to supply this info in the REQUEST_URI
544 environment variable -- which isn't part of the CGI/1.1 spec.
545 Thus, if REQUEST_URI isn't available, we concatenate SCRIPT_NAME
546 and PATH_INFO, both of which are part of CGI/1.1.
548 Optional argument `fallback' (default `None') is returned if both of
549 the above methods fail.
551 The url will be cgi escaped to prevent cross-site scripting attacks,
552 unless `escape' is set to 0.
555 if 'REQUEST_URI' in os
.environ
:
556 url
= os
.environ
['REQUEST_URI']
557 elif 'SCRIPT_NAME' in os
.environ
and 'PATH_INFO' in os
.environ
:
558 url
= os
.environ
['SCRIPT_NAME'] + os
.environ
['PATH_INFO']
565 def makedirs(path
, mode
=02775):
569 os
.makedirs(path
, mode
)
573 # Ignore the exceptions if the directory already exists
574 if e
.errno
<> errno
.EEXIST
:
579 # XXX Replace this with direct calls. For now, existing uses of GetCharSet()
580 # are too numerous to change.
581 def GetCharSet(lang
):
582 return config
.languages
.get_charset(lang
)
586 def get_request_domain():
587 host
= os
.environ
.get('HTTP_HOST', os
.environ
.get('SERVER_NAME'))
588 port
= os
.environ
.get('SERVER_PORT')
589 # Strip off the port if there is one
590 if port
and host
.endswith(':' + port
):
591 host
= host
[:-len(port
)-1]
595 def get_site_noreply():
596 return '%s@%s' % (config
.NO_REPLY_ADDRESS
, config
.DEFAULT_EMAIL_HOST
)
600 # Figure out epoch seconds of midnight at the start of today (or the given
601 # 3-tuple date of (year, month, day).
602 def midnight(date
=None):
604 date
= time
.localtime()[:3]
605 # -1 for dst flag tells the library to figure it out
606 return time
.mktime(date
+ (0,)*5 + (-1,))
610 # Utilities to convert from simplified $identifier substitutions to/from
611 # standard Python $(identifier)s substititions. The "Guido rules" for the
614 # $identifier -> $(identifier)s
615 # ${identifier} -> $(identifier)s
618 """Convert from %-strings to $-strings."""
619 s
= s
.replace('$', '$$').replace('%%', '%')
621 for i
in range(1, len(parts
), 2):
622 if parts
[i
+1] and parts
[i
+1][0] in IDENTCHARS
:
623 parts
[i
] = '${' + parts
[i
] + '}'
625 parts
[i
] = '$' + parts
[i
]
626 return EMPTYSTRING
.join(parts
)
630 """Convert from $-strings to %-strings."""
631 s
= s
.replace('%', '%%').replace('$$', '$')
633 for i
in range(1, len(parts
), 4):
634 if parts
[i
] is not None:
636 elif parts
[i
+1] is not None:
637 parts
[i
+1] = '%(' + parts
[i
+1] + ')s'
639 parts
[i
+2] = '%(' + parts
[i
+2] + ')s'
640 return EMPTYSTRING
.join(filter(None, parts
))
643 def dollar_identifiers(s
):
644 """Return the set (dictionary) of identifiers found in a $-string."""
646 for name
in filter(None, [b
or c
or None for a
, b
, c
in dre
.findall(s
)]):
651 def percent_identifiers(s
):
652 """Return the set (dictionary) of identifiers found in a %-string."""
654 for name
in cre
.findall(s
):
660 # Utilities to canonicalize a string, which means un-HTML-ifying the string to
661 # produce a Unicode string or an 8-bit string if all the characters are ASCII.
662 def canonstr(s
, lang
=None):
664 parts
= re
.split(r
'&(?P<ref>[^;]+);', s
)
667 newparts
.append(chr(i
))
669 newparts
.append(unichr(i
))
671 newparts
.append(parts
.pop(0))
675 if ref
.startswith('#'):
679 # Non-convertable, stick with what we got
680 newparts
.append('&'+ref
+';')
682 c
= htmlentitydefs
.entitydefs
.get(ref
, '?')
683 if c
.startswith('#') and c
.endswith(';'):
684 appchr(int(ref
[1:-1]))
687 newstr
= EMPTYSTRING
.join(newparts
)
688 if isinstance(newstr
, unicode):
690 # We want the default fallback to be iso-8859-1 even if the language is
691 # English (us-ascii). This seems like a practical compromise so that
692 # non-ASCII characters in names can be used in English lists w/o having to
693 # change the global charset for English from us-ascii (which I
694 # superstitiously think may have unintended consequences).
696 charset
= 'iso-8859-1'
698 charset
= GetCharSet(lang
)
699 if charset
== 'us-ascii':
700 charset
= 'iso-8859-1'
701 return unicode(newstr
, charset
, 'replace')
704 # The opposite of canonstr() -- sorta. I.e. it attempts to encode s in the
705 # charset of the given language, which is the character set that the page will
706 # be rendered in, and failing that, replaces non-ASCII characters with their
707 # html references. It always returns a byte string.
708 def uncanonstr(s
, lang
=None):
714 charset
= GetCharSet(lang
)
715 # See if the string contains characters only in the desired character
716 # set. If so, return it unchanged, except for coercing it to a byte
719 if isinstance(s
, unicode):
720 return s
.encode(charset
)
722 u
= unicode(s
, charset
)
725 # Nope, it contains funny characters, so html-ref it
734 a
.append('&#%3d;' % o
)
737 # Join characters together and coerce to byte string
738 return str(EMPTYSTRING
.join(a
))
741 def oneline(s
, cset
='us-ascii', in_unicode
=False):
742 # Decode header string in one line and convert into specified charset
744 h
= email
.Header
.make_header(email
.Header
.decode_header(s
))
745 ustr
= h
.__unicode
__()
746 line
= UEMPTYSTRING
.join(ustr
.splitlines())
750 return line
.encode(cset
, 'replace')
751 except (LookupError, UnicodeError, ValueError, HeaderParseError
):
752 # possibly charset problem. return with undecoded string in one line.
753 return EMPTYSTRING
.join(s
.splitlines())
756 def strip_verbose_pattern(pattern
):
757 # Remove white space and comments from a verbose pattern and return a
758 # non-verbose, equivalent pattern. Replace CR and NL in the result
759 # with '\\r' and '\\n' respectively to avoid multi-line results.
760 if not isinstance(pattern
, str):
767 while i
< len(pattern
):
780 elif c
== '#' and not inclass
:
782 elif c
== '[' and not inclass
:
786 elif c
== ']' and inclass
:
789 elif re
.search('\s', c
):
797 elif c
== '\\' and not inclass
:
812 def get_pattern(email
, pattern_list
):
813 """Returns matched entry in pattern_list if email matches.
814 Otherwise returns None.
819 for pattern
in pattern_list
:
820 if pattern
.startswith('^'):
821 # This is a regular expression match
823 if re
.search(pattern
, email
, re
.IGNORECASE
):
827 # BAW: we should probably remove this pattern
830 # Do the comparison case insensitively
831 if pattern
.lower() == email
.lower():