mailman/Utils.py

   1 # Copyright (C) 1998-2008 by the Free Software Foundation, Inc.
   2 #
   3 # This file is part of GNU Mailman.
   4 #
   5 # GNU Mailman is free software: you can redistribute it and/or modify it under
   6 # the terms of the GNU General Public License as published by the Free
   7 # Software Foundation, either version 3 of the License, or (at your option)
   8 # any later version.
   9 #
  10 # GNU Mailman is distributed in the hope that it will be useful, but WITHOUT
  11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  13 # more details.
  14 #
  15 # You should have received a copy of the GNU General Public License along with
  16 # GNU Mailman.  If not, see <http://www.gnu.org/licenses/>.
  17
  18 """Miscellaneous essential routines.
  19
  20 This includes actual message transmission routines, address checking and
  21 message and address munging, a handy-dandy routine to map a function on all
  22 the mailing lists, and whatever else doesn't belong elsewhere.
  23 """
  24
  25 import os
  26 import re
  27 import cgi
  28 import time
  29 import errno
  30 import base64
  31 import random
  32 import logging
  33 import urlparse
  34 import htmlentitydefs
  35 import email.Header
  36 import email.Iterators
  37
  38 from email.Errors import HeaderParseError
  39 from string import ascii_letters, digits, whitespace, Template
  40
  41 import mailman.templates
  42
  43 from mailman import passwords
  44 from mailman.configuration import config
  45 from mailman.core import errors
  46
  47 AT = '@'
  48 CR = '\r'
  49 DOT = '.'
  50 EMPTYSTRING = ''
  51 IDENTCHARS = ascii_letters + digits + '_'
  52 NL = '\n'
  53 UEMPTYSTRING = u''
  54 TEMPLATE_DIR = os.path.dirname(mailman.templates.__file__)
  55
  56 # Search for $(identifier)s strings, except that the trailing s is optional,
  57 # since that's a common mistake
  58 cre = re.compile(r'%\(([_a-z]\w*?)\)s?', re.IGNORECASE)
  59 # Search for $$, $identifier, or ${identifier}
  60 dre = re.compile(r'(\${2})|\$([_a-z]\w*)|\${([_a-z]\w*)}', re.IGNORECASE)
  61
  62 log = logging.getLogger('mailman.error')
  63
  64
  65 \f
  66 def list_exists(fqdn_listname):
  67     """Return true iff list `fqdn_listname' exists."""
  68     return config.db.list_manager.get(fqdn_listname) is not None
  69
  70
  71 def list_names():
  72     """Return the fqdn names of all lists in default list directory."""
  73     return ['%s@%s' % (listname, hostname)
  74             for listname, hostname in config.db.list_manager.get_list_names()]
  75
  76
  77 def split_listname(listname):
  78     if AT in listname:
  79         return listname.split(AT, 1)
  80     return listname, config.DEFAULT_EMAIL_HOST
  81
  82
  83 def fqdn_listname(listname, hostname=None):
  84     if hostname is None:
  85         return AT.join(split_listname(listname))
  86     return AT.join((listname, hostname))
  87
  88
  89 \f
  90 # a much more naive implementation than say, Emacs's fill-paragraph!
  91 def wrap(text, column=70, honor_leading_ws=True):
  92     """Wrap and fill the text to the specified column.
  93
  94     Wrapping is always in effect, although if it is not possible to wrap a
  95     line (because some word is longer than `column' characters) the line is
  96     broken at the next available whitespace boundary.  Paragraphs are also
  97     always filled, unless honor_leading_ws is true and the line begins with
  98     whitespace.  This is the algorithm that the Python FAQ wizard uses, and
  99     seems like a good compromise.
 100
 101     """
 102     wrapped = ''
 103     # first split the text into paragraphs, defined as a blank line
 104     paras = re.split('\n\n', text)
 105     for para in paras:
 106         # fill
 107         lines = []
 108         fillprev = False
 109         for line in para.split(NL):
 110             if not line:
 111                 lines.append(line)
 112                 continue
 113             if honor_leading_ws and line[0] in whitespace:
 114                 fillthis = False
 115             else:
 116                 fillthis = True
 117             if fillprev and fillthis:
 118                 # if the previous line should be filled, then just append a
 119                 # single space, and the rest of the current line
 120                 lines[-1] = lines[-1].rstrip() + ' ' + line
 121             else:
 122                 # no fill, i.e. retain newline
 123                 lines.append(line)
 124             fillprev = fillthis
 125         # wrap each line
 126         for text in lines:
 127             while text:
 128                 if len(text) <= column:
 129                     line = text
 130                     text = ''
 131                 else:
 132                     bol = column
 133                     # find the last whitespace character
 134                     while bol > 0 and text[bol] not in whitespace:
 135                         bol -= 1
 136                     # now find the last non-whitespace character
 137                     eol = bol
 138                     while eol > 0 and text[eol] in whitespace:
 139                         eol -= 1
 140                     # watch out for text that's longer than the column width
 141                     if eol == 0:
 142                         # break on whitespace after column
 143                         eol = column
 144                         while eol < len(text) and text[eol] not in whitespace:
 145                             eol += 1
 146                         bol = eol
 147                         while bol < len(text) and text[bol] in whitespace:
 148                             bol += 1
 149                         bol -= 1
 150                     line = text[:eol+1] + '\n'
 151                     # find the next non-whitespace character
 152                     bol += 1
 153                     while bol < len(text) and text[bol] in whitespace:
 154                         bol += 1
 155                     text = text[bol:]
 156                 wrapped += line
 157             wrapped += '\n'
 158             # end while text
 159         wrapped += '\n'
 160         # end for text in lines
 161     # the last two newlines are bogus
 162     return wrapped[:-2]
 163
 164
 165 \f
 166 def QuotePeriods(text):
 167     JOINER = '\n .\n'
 168     SEP = '\n.\n'
 169     return JOINER.join(text.split(SEP))
 170
 171
 172 # This takes an email address, and returns a tuple containing (user,host)
 173 def ParseEmail(email):
 174     user = None
 175     domain = None
 176     email = email.lower()
 177     at_sign = email.find('@')
 178     if at_sign < 1:
 179         return email, None
 180     user = email[:at_sign]
 181     rest = email[at_sign+1:]
 182     domain = rest.split('.')
 183     return user, domain
 184
 185
 186 def LCDomain(addr):
 187     "returns the address with the domain part lowercased"
 188     atind = addr.find('@')
 189     if atind == -1: # no domain part
 190         return addr
 191     return addr[:atind] + '@' + addr[atind+1:].lower()
 192
 193
 194 # TBD: what other characters should be disallowed?
 195 _badchars = re.compile(r'[][()<>|;^,\000-\037\177-\377]')
 196
 197 def ValidateEmail(s):
 198     """Verify that the an email address isn't grossly evil."""
 199     # Pretty minimal, cheesy check.  We could do better...
 200     if not s or ' ' in s:
 201         raise errors.InvalidEmailAddress(repr(s))
 202     if _badchars.search(s) or s[0] == '-':
 203         raise errors.InvalidEmailAddress(repr(s))
 204     user, domain_parts = ParseEmail(s)
 205     # Local, unqualified addresses are not allowed.
 206     if not domain_parts:
 207         raise errors.InvalidEmailAddress(repr(s))
 208     if len(domain_parts) < 2:
 209         raise errors.InvalidEmailAddress(repr(s))
 210
 211
 212 \f
 213 # Patterns which may be used to form malicious path to inject a new
 214 # line in the mailman error log. (TK: advisory by Moritz Naumann)
 215 CRNLpat = re.compile(r'[^\x21-\x7e]')
 216
 217 def GetPathPieces(envar='PATH_INFO'):
 218     path = os.environ.get(envar)
 219     if path:
 220         if CRNLpat.search(path):
 221             path = CRNLpat.split(path)[0]
 222             log.error('Warning: Possible malformed path attack.')
 223         return [p for p in path.split('/') if p]
 224     return []
 225
 226
 227 \f
 228 def ScriptURL(target):
 229     up = '../' * len(GetPathPieces())
 230     return '%s%s' % (up, target + config.CGIEXT)
 231
 232
 233 \f
 234 def GetPossibleMatchingAddrs(name):
 235     """returns a sorted list of addresses that could possibly match
 236     a given name.
 237
 238     For Example, given scott@pobox.com, return ['scott@pobox.com'],
 239     given scott@blackbox.pobox.com return ['scott@blackbox.pobox.com',
 240                                            'scott@pobox.com']"""
 241
 242     name = name.lower()
 243     user, domain = ParseEmail(name)
 244     res = [name]
 245     if domain:
 246         domain = domain[1:]
 247         while len(domain) >= 2:
 248             res.append("%s@%s" % (user, DOT.join(domain)))
 249             domain = domain[1:]
 250     return res
 251
 252
 253 \f
 254 def List2Dict(L, foldcase=False):
 255     """Return a dict keyed by the entries in the list passed to it."""
 256     d = {}
 257     if foldcase:
 258         for i in L:
 259             d[i.lower()] = True
 260     else:
 261         for i in L:
 262             d[i] = True
 263     return d
 264
 265
 266 \f
 267 _vowels = ('a', 'e', 'i', 'o', 'u')
 268 _consonants = ('b', 'c', 'd', 'f', 'g', 'h', 'k', 'm', 'n',
 269                'p', 'r', 's', 't', 'v', 'w', 'x', 'z')
 270 _syllables = []
 271
 272 for v in _vowels:
 273     for c in _consonants:
 274         _syllables.append(c+v)
 275         _syllables.append(v+c)
 276 del c, v
 277
 278 def UserFriendly_MakeRandomPassword(length):
 279     syls = []
 280     while len(syls) * 2 < length:
 281         syls.append(random.choice(_syllables))
 282     return EMPTYSTRING.join(syls)[:length]
 283
 284
 285 def Secure_MakeRandomPassword(length):
 286     bytesread = 0
 287     bytes = []
 288     fd = None
 289     try:
 290         while bytesread < length:
 291             try:
 292                 # Python 2.4 has this on available systems.
 293                 newbytes = os.urandom(length - bytesread)
 294             except (AttributeError, NotImplementedError):
 295                 if fd is None:
 296                     try:
 297                         fd = os.open('/dev/urandom', os.O_RDONLY)
 298                     except OSError, e:
 299                         if e.errno <> errno.ENOENT:
 300                             raise
 301                         # We have no available source of cryptographically
 302                         # secure random characters.  Log an error and fallback
 303                         # to the user friendly passwords.
 304                         log.error('urandom not available, passwords not secure')
 305                         return UserFriendly_MakeRandomPassword(length)
 306                 newbytes = os.read(fd, length - bytesread)
 307             bytes.append(newbytes)
 308             bytesread += len(newbytes)
 309         s = base64.encodestring(EMPTYSTRING.join(bytes))
 310         # base64 will expand the string by 4/3rds
 311         return s.replace('\n', '')[:length]
 312     finally:
 313         if fd is not None:
 314             os.close(fd)
 315
 316
 317 def MakeRandomPassword(length=None):
 318     if length is None:
 319         length = config.MEMBER_PASSWORD_LENGTH
 320     if config.USER_FRIENDLY_PASSWORDS:
 321         password = UserFriendly_MakeRandomPassword(length)
 322     else:
 323         password = Secure_MakeRandomPassword(length)
 324     return password.decode('ascii')
 325
 326
 327 def GetRandomSeed():
 328     chr1 = int(random.random() * 52)
 329     chr2 = int(random.random() * 52)
 330     def mkletter(c):
 331         if 0 <= c < 26:
 332             c += 65
 333         if 26 <= c < 52:
 334             #c = c - 26 + 97
 335             c += 71
 336         return c
 337     return "%c%c" % tuple(map(mkletter, (chr1, chr2)))
 338
 339
 340 \f
 341 def set_global_password(pw, siteadmin=True, scheme=None):
 342     if scheme is None:
 343         scheme = passwords.Schemes.ssha
 344     if siteadmin:
 345         filename = config.SITE_PW_FILE
 346     else:
 347         filename = config.LISTCREATOR_PW_FILE
 348     try:
 349         fp = open(filename, 'w')
 350         print >> fp, passwords.make_secret(pw, scheme)
 351     finally:
 352         fp.close()
 353
 354
 355 def get_global_password(siteadmin=True):
 356     if siteadmin:
 357         filename = config.SITE_PW_FILE
 358     else:
 359         filename = config.LISTCREATOR_PW_FILE
 360     try:
 361         fp = open(filename)
 362         challenge = fp.read()[:-1]                # strip off trailing nl
 363         fp.close()
 364     except IOError, e:
 365         if e.errno <> errno.ENOENT:
 366             raise
 367         # It's okay not to have a site admin password
 368         return None
 369     return challenge
 370
 371
 372 def check_global_password(response, siteadmin=True):
 373     challenge = get_global_password(siteadmin)
 374     if challenge is None:
 375         return False
 376     return passwords.check_response(challenge, response)
 377
 378
 379 \f
 380 def websafe(s):
 381     return cgi.escape(s, quote=True)
 382
 383
 384 def nntpsplit(s):
 385     parts = s.split(':', 1)
 386     if len(parts) == 2:
 387         try:
 388             return parts[0], int(parts[1])
 389         except ValueError:
 390             pass
 391     # Use the defaults
 392     return s, 119
 393
 394
 395 \f
 396 # Just changing these two functions should be enough to control the way
 397 # that email address obscuring is handled.
 398 def ObscureEmail(addr, for_text=False):
 399     """Make email address unrecognizable to web spiders, but invertable.
 400
 401     When for_text option is set (not default), make a sentence fragment
 402     instead of a token."""
 403     if for_text:
 404         return addr.replace('@', ' at ')
 405     else:
 406         return addr.replace('@', '--at--')
 407
 408 def UnobscureEmail(addr):
 409     """Invert ObscureEmail() conversion."""
 410     # Contrived to act as an identity operation on already-unobscured
 411     # emails, so routines expecting obscured ones will accept both.
 412     return addr.replace('--at--', '@')
 413
 414
 415 \f
 416 class OuterExit(Exception):
 417     pass
 418
 419 def findtext(templatefile, dict=None, raw=False, lang=None, mlist=None):
 420     # Make some text from a template file.  The order of searches depends on
 421     # whether mlist and lang are provided.  Once the templatefile is found,
 422     # string substitution is performed by interpolation in `dict'.  If `raw'
 423     # is false, the resulting text is wrapped/filled by calling wrap().
 424     #
 425     # When looking for a template in a specific language, there are 4 places
 426     # that are searched, in this order:
 427     #
 428     # 1. the list-specific language directory
 429     #    lists/<listname>/<language>
 430     #
 431     # 2. the domain-specific language directory
 432     #    templates/<list.host_name>/<language>
 433     #
 434     # 3. the site-wide language directory
 435     #    templates/site/<language>
 436     #
 437     # 4. the global default language directory
 438     #    templates/<language>
 439     #
 440     # The first match found stops the search.  In this way, you can specialize
 441     # templates at the desired level, or, if you use only the default
 442     # templates, you don't need to change anything.  You should never modify
 443     # files in the templates/<language> subdirectory, since Mailman will
 444     # overwrite these when you upgrade.  That's what the templates/site
 445     # language directories are for.
 446     #
 447     # A further complication is that the language to search for is determined
 448     # by both the `lang' and `mlist' arguments.  The search order there is
 449     # that if lang is given, then the 4 locations above are searched,
 450     # substituting lang for <language>.  If no match is found, and mlist is
 451     # given, then the 4 locations are searched using the list's preferred
 452     # language.  After that, the server default language is used for
 453     # <language>.  If that still doesn't yield a template, then the standard
 454     # distribution's English language template is used as an ultimate
 455     # fallback, and when lang is not 'en', the resulting template is passed
 456     # through the translation service.  If this template is missing you've got
 457     # big problems. ;)
 458     #
 459     # A word on backwards compatibility: Mailman versions prior to 2.1 stored
 460     # templates in templates/*.{html,txt} and lists/<listname>/*.{html,txt}.
 461     # Those directories are no longer searched so if you've got customizations
 462     # in those files, you should move them to the appropriate directory based
 463     # on the above description.  Mailman's upgrade script cannot do this for
 464     # you.
 465     #
 466     # The function has been revised and renamed as it now returns both the
 467     # template text and the path from which it retrieved the template. The
 468     # original function is now a wrapper which just returns the template text
 469     # as before, by calling this renamed function and discarding the second
 470     # item returned.
 471     #
 472     # Calculate the languages to scan
 473     languages = set()
 474     if lang is not None:
 475         languages.add(lang)
 476     if mlist is not None:
 477         languages.add(mlist.preferred_language)
 478     languages.add(config.DEFAULT_SERVER_LANGUAGE)
 479     assert None not in languages, 'None in languages'
 480     # Calculate the locations to scan
 481     searchdirs = []
 482     if mlist is not None:
 483         searchdirs.append(mlist.data_path)
 484         searchdirs.append(os.path.join(TEMPLATE_DIR, mlist.host_name))
 485     searchdirs.append(os.path.join(TEMPLATE_DIR, 'site'))
 486     searchdirs.append(TEMPLATE_DIR)
 487     # Start scanning
 488     fp = None
 489     try:
 490         for lang in languages:
 491             for dir in searchdirs:
 492                 filename = os.path.join(dir, lang, templatefile)
 493                 try:
 494                     fp = open(filename)
 495                     raise OuterExit
 496                 except IOError, e:
 497                     if e.errno <> errno.ENOENT: raise
 498                     # Okay, it doesn't exist, keep looping
 499                     fp = None
 500     except OuterExit:
 501         pass
 502     if fp is None:
 503         # Try one last time with the distro English template, which, unless
 504         # you've got a really broken installation, must be there.
 505         try:
 506             filename = os.path.join(TEMPLATE_DIR, 'en', templatefile)
 507             fp = open(filename)
 508         except IOError, e:
 509             if e.errno <> errno.ENOENT:
 510                 raise
 511             # We never found the template.  BAD!
 512             raise IOError(errno.ENOENT, 'No template file found', templatefile)
 513         else:
 514             from mailman.i18n import get_translation
 515             # XXX BROKEN HACK
 516             data = fp.read()[:-1]
 517             template = get_translation().ugettext(data)
 518             fp.close()
 519     else:
 520         template = fp.read()
 521         fp.close()
 522         template = unicode(template, GetCharSet(lang), 'replace')
 523     text = template
 524     if dict is not None:
 525         try:
 526             text = Template(template).safe_substitute(**dict)
 527         except (TypeError, ValueError):
 528             # The template is really screwed up
 529             log.exception('broken template: %s', filename)
 530     if raw:
 531         return text, filename
 532     return wrap(text), filename
 533
 534
 535 def maketext(templatefile, dict=None, raw=False, lang=None, mlist=None):
 536     return findtext(templatefile, dict, raw, lang, mlist)[0]
 537
 538
 539 \f
 540 def GetRequestURI(fallback=None, escape=True):
 541     """Return the full virtual path this CGI script was invoked with.
 542
 543     Newer web servers seems to supply this info in the REQUEST_URI
 544     environment variable -- which isn't part of the CGI/1.1 spec.
 545     Thus, if REQUEST_URI isn't available, we concatenate SCRIPT_NAME
 546     and PATH_INFO, both of which are part of CGI/1.1.
 547
 548     Optional argument `fallback' (default `None') is returned if both of
 549     the above methods fail.
 550
 551     The url will be cgi escaped to prevent cross-site scripting attacks,
 552     unless `escape' is set to 0.
 553     """
 554     url = fallback
 555     if 'REQUEST_URI' in os.environ:
 556         url = os.environ['REQUEST_URI']
 557     elif 'SCRIPT_NAME' in os.environ and 'PATH_INFO' in os.environ:
 558         url = os.environ['SCRIPT_NAME'] + os.environ['PATH_INFO']
 559     if escape:
 560         return websafe(url)
 561     return url
 562
 563
 564 \f
 565 def makedirs(path, mode=02775):
 566     try:
 567         omask = os.umask(0)
 568         try:
 569             os.makedirs(path, mode)
 570         finally:
 571             os.umask(omask)
 572     except OSError, e:
 573         # Ignore the exceptions if the directory already exists
 574         if e.errno <> errno.EEXIST:
 575             raise
 576
 577
 578 \f
 579 # XXX Replace this with direct calls.  For now, existing uses of GetCharSet()
 580 # are too numerous to change.
 581 def GetCharSet(lang):
 582     return config.languages.get_charset(lang)
 583
 584
 585 \f
 586 def get_request_domain():
 587     host = os.environ.get('HTTP_HOST', os.environ.get('SERVER_NAME'))
 588     port = os.environ.get('SERVER_PORT')
 589     # Strip off the port if there is one
 590     if port and host.endswith(':' + port):
 591         host = host[:-len(port)-1]
 592     return host.lower()
 593
 594
 595 def get_site_noreply():
 596     return '%s@%s' % (config.NO_REPLY_ADDRESS, config.DEFAULT_EMAIL_HOST)
 597
 598
 599 \f
 600 # Figure out epoch seconds of midnight at the start of today (or the given
 601 # 3-tuple date of (year, month, day).
 602 def midnight(date=None):
 603     if date is None:
 604         date = time.localtime()[:3]
 605     # -1 for dst flag tells the library to figure it out
 606     return time.mktime(date + (0,)*5 + (-1,))
 607
 608
 609 \f
 610 # Utilities to convert from simplified $identifier substitutions to/from
 611 # standard Python $(identifier)s substititions.  The "Guido rules" for the
 612 # former are:
 613 #    $$ -> $
 614 #    $identifier -> $(identifier)s
 615 #    ${identifier} -> $(identifier)s
 616
 617 def to_dollar(s):
 618     """Convert from %-strings to $-strings."""
 619     s = s.replace('$', '$$').replace('%%', '%')
 620     parts = cre.split(s)
 621     for i in range(1, len(parts), 2):
 622         if parts[i+1] and parts[i+1][0] in IDENTCHARS:
 623             parts[i] = '${' + parts[i] + '}'
 624         else:
 625             parts[i] = '$' + parts[i]
 626     return EMPTYSTRING.join(parts)
 627
 628
 629 def to_percent(s):
 630     """Convert from $-strings to %-strings."""
 631     s = s.replace('%', '%%').replace('$$', '$')
 632     parts = dre.split(s)
 633     for i in range(1, len(parts), 4):
 634         if parts[i] is not None:
 635             parts[i] = '$'
 636         elif parts[i+1] is not None:
 637             parts[i+1] = '%(' + parts[i+1] + ')s'
 638         else:
 639             parts[i+2] = '%(' + parts[i+2] + ')s'
 640     return EMPTYSTRING.join(filter(None, parts))
 641
 642
 643 def dollar_identifiers(s):
 644     """Return the set (dictionary) of identifiers found in a $-string."""
 645     d = {}
 646     for name in filter(None, [b or c or None for a, b, c in dre.findall(s)]):
 647         d[name] = True
 648     return d
 649
 650
 651 def percent_identifiers(s):
 652     """Return the set (dictionary) of identifiers found in a %-string."""
 653     d = {}
 654     for name in cre.findall(s):
 655         d[name] = True
 656     return d
 657
 658
 659 \f
 660 # Utilities to canonicalize a string, which means un-HTML-ifying the string to
 661 # produce a Unicode string or an 8-bit string if all the characters are ASCII.
 662 def canonstr(s, lang=None):
 663     newparts = []
 664     parts = re.split(r'&(?P<ref>[^;]+);', s)
 665     def appchr(i):
 666         if i < 256:
 667             newparts.append(chr(i))
 668         else:
 669             newparts.append(unichr(i))
 670     while True:
 671         newparts.append(parts.pop(0))
 672         if not parts:
 673             break
 674         ref = parts.pop(0)
 675         if ref.startswith('#'):
 676             try:
 677                 appchr(int(ref[1:]))
 678             except ValueError:
 679                 # Non-convertable, stick with what we got
 680                 newparts.append('&'+ref+';')
 681         else:
 682             c = htmlentitydefs.entitydefs.get(ref, '?')
 683             if c.startswith('#') and c.endswith(';'):
 684                 appchr(int(ref[1:-1]))
 685             else:
 686                 newparts.append(c)
 687     newstr = EMPTYSTRING.join(newparts)
 688     if isinstance(newstr, unicode):
 689         return newstr
 690     # We want the default fallback to be iso-8859-1 even if the language is
 691     # English (us-ascii).  This seems like a practical compromise so that
 692     # non-ASCII characters in names can be used in English lists w/o having to
 693     # change the global charset for English from us-ascii (which I
 694     # superstitiously think may have unintended consequences).
 695     if lang is None:
 696         charset = 'iso-8859-1'
 697     else:
 698         charset = GetCharSet(lang)
 699         if charset == 'us-ascii':
 700             charset = 'iso-8859-1'
 701     return unicode(newstr, charset, 'replace')
 702
 703
 704 # The opposite of canonstr() -- sorta.  I.e. it attempts to encode s in the
 705 # charset of the given language, which is the character set that the page will
 706 # be rendered in, and failing that, replaces non-ASCII characters with their
 707 # html references.  It always returns a byte string.
 708 def uncanonstr(s, lang=None):
 709     if s is None:
 710         s = u''
 711     if lang is None:
 712         charset = 'us-ascii'
 713     else:
 714         charset = GetCharSet(lang)
 715     # See if the string contains characters only in the desired character
 716     # set.  If so, return it unchanged, except for coercing it to a byte
 717     # string.
 718     try:
 719         if isinstance(s, unicode):
 720             return s.encode(charset)
 721         else:
 722             u = unicode(s, charset)
 723             return s
 724     except UnicodeError:
 725         # Nope, it contains funny characters, so html-ref it
 726         return uquote(s)
 727
 728
 729 def uquote(s):
 730     a = []
 731     for c in s:
 732         o = ord(c)
 733         if o > 127:
 734             a.append('&#%3d;' % o)
 735         else:
 736             a.append(c)
 737     # Join characters together and coerce to byte string
 738     return str(EMPTYSTRING.join(a))
 739
 740
 741 def oneline(s, cset='us-ascii', in_unicode=False):
 742     # Decode header string in one line and convert into specified charset
 743     try:
 744         h = email.Header.make_header(email.Header.decode_header(s))
 745         ustr = h.__unicode__()
 746         line = UEMPTYSTRING.join(ustr.splitlines())
 747         if in_unicode:
 748             return line
 749         else:
 750             return line.encode(cset, 'replace')
 751     except (LookupError, UnicodeError, ValueError, HeaderParseError):
 752         # possibly charset problem. return with undecoded string in one line.
 753         return EMPTYSTRING.join(s.splitlines())
 754
 755
 756 def strip_verbose_pattern(pattern):
 757     # Remove white space and comments from a verbose pattern and return a
 758     # non-verbose, equivalent pattern.  Replace CR and NL in the result
 759     # with '\\r' and '\\n' respectively to avoid multi-line results.
 760     if not isinstance(pattern, str):
 761         return pattern
 762     newpattern = ''
 763     i = 0
 764     inclass = False
 765     skiptoeol = False
 766     copynext = False
 767     while i < len(pattern):
 768         c = pattern[i]
 769         if copynext:
 770             if c == NL:
 771                 newpattern += '\\n'
 772             elif c == CR:
 773                 newpattern += '\\r'
 774             else:
 775                 newpattern += c
 776             copynext = False
 777         elif skiptoeol:
 778             if c == NL:
 779                 skiptoeol = False
 780         elif c == '#' and not inclass:
 781             skiptoeol = True
 782         elif c == '[' and not inclass:
 783             inclass = True
 784             newpattern += c
 785             copynext = True
 786         elif c == ']' and inclass:
 787             inclass = False
 788             newpattern += c
 789         elif re.search('\s', c):
 790             if inclass:
 791                 if c == NL:
 792                     newpattern += '\\n'
 793                 elif c == CR:
 794                     newpattern += '\\r'
 795                 else:
 796                     newpattern += c
 797         elif c == '\\' and not inclass:
 798             newpattern += c
 799             copynext = True
 800         else:
 801             if c == NL:
 802                 newpattern += '\\n'
 803             elif c == CR:
 804                 newpattern += '\\r'
 805             else:
 806                 newpattern += c
 807         i += 1
 808     return newpattern
 809
 810
 811 \f
 812 def get_pattern(email, pattern_list):
 813     """Returns matched entry in pattern_list if email matches.
 814     Otherwise returns None.
 815     """
 816     if not pattern_list:
 817         return None
 818     matched = None
 819     for pattern in pattern_list:
 820         if pattern.startswith('^'):
 821             # This is a regular expression match
 822             try:
 823                 if re.search(pattern, email, re.IGNORECASE):
 824                     matched = pattern
 825                     break
 826             except re.error:
 827                 # BAW: we should probably remove this pattern
 828                 pass
 829         else:
 830             # Do the comparison case insensitively
 831             if pattern.lower() == email.lower():
 832                 matched = pattern
 833                 break
 834     return matched