src/Comic/Fetch.py

   1 #!/usr/bin/python
   2 ###########################################################################
   3 #    Copyright (C) 2007 by Andrew Mahone
   4 #    <andrew.mahone@gmail.com>
   5 #
   6 # Copyright: See COPYING file that comes with this distribution
   7 #
   8 ###########################################################################
   9 """Simple interface for retrieving comics"""
  10 import time, urllib, urllib2, random, yaml, re, BeautifulSoup, \
  11     copy, urlparse, os.path, NameEnc, sys, mimetypes, threading, traceback
  12 from signal import SIG_IGN, SIGINT, signal
  13 from Comic import DB, Conf, ComicLoader, Magic, InitDB
  14 from htmlentitydefs import name2codepoint
  15 from pkg_resources import resource_stream
  16 HaltLock = threading.Lock()
  17
  18 REType = type(re.compile(''))
  19
  20 Defs = {}
  21 Classes = {}
  22
  23 def checkHalt(func):
  24     def proxy(*args, **kw):
  25         if HaltLock.locked():
  26             raise KeyboardInterrupt
  27         return func(*args, **kw)
  28     return proxy
  29
  30 def Merge(infile, theclass, thedict):
  31     try:
  32         if isinstance(infile,basestring):
  33             infile = open(infile, 'rb')
  34         newdefs = yaml.load(infile, Loader=ComicLoader)
  35         newdefs = [ (c[0], theclass(name=c[0],**c[1])) for c in \
  36             newdefs.iteritems() ]
  37         thedict.update(newdefs)
  38     except IOError: pass
  39
  40 def FetchURL(url, referer=None, outfile=None):
  41     '''
  42         Fetches the requested url, with referer if desired.  If outfile is
  43         specified, returns url info, otherwise returns (data, url info).
  44         Exceptions other than KeyboardInterrupt will be ignored silently until
  45         the specified number of attempts have been made to retrieve the url.
  46         Fetch will sleep for the specified delay before attempt, with the delay
  47         increasing exponentially.
  48     '''
  49     print url
  50     attempts = Conf.dl_attempts
  51     delay = Conf.dl_delay
  52     while attempts:
  53         attempts -= 1
  54         time.sleep(delay * (1 + random.random() * Conf.dl_delay_rand))
  55         delay *= Conf.dl_delay_mul
  56         try:
  57             if isinstance(outfile, basestring):
  58                 outfile = open(outfile, 'wb')
  59             if outfile:
  60                 b = 0
  61             else:
  62                 b= ''
  63             req = urllib2.Request(url)
  64             if referer:
  65                 req.add_header('Referer',referer)
  66             handle = urllib2.urlopen(req)
  67             while True:
  68                 if HaltLock.locked():
  69                     raise KeyboardInterrupt
  70                 dat = handle.read(4096)
  71                 if len(dat):
  72                     if outfile:
  73                         outfile.write(dat)
  74                         b += len(dat)
  75                     else:
  76                         b += dat
  77                 else:
  78                     if outfile:
  79                         outfile.close()
  80                     break;
  81             i = handle.info()
  82             if i.has_key('Content-Length'):
  83                 if outfile:
  84                     n = b
  85                 else:
  86                     n = len(b)
  87                 if n != int(i['Content-Length']):
  88                     raise EOFError('Received incomplete file from server')
  89             if outfile:
  90                 return i
  91             else:
  92                 return (b, i)
  93         except KeyboardInterrupt:
  94             raise
  95         except:
  96             if not attempts: raise
  97
  98 _EntityRE = re.compile('(?:&((?:#(\d+)|([A-Za-z]+));)|(["\'&<>]))')
  99
 100 _MSEntities = {
 101     130: 8218,
 102     131: 402,
 103     132: 8222,
 104     133: 8230,
 105     134: 8224,
 106     135: 8225,
 107     136: 710,
 108     137: 8240,
 109     138: 352,
 110     139: 8249,
 111     140: 338,
 112     145: 8216,
 113     146: 8217,
 114     147: 8220,
 115     148: 8221,
 116     149: 8226,
 117     150: 8211,
 118     151: 8212,
 119     152: 732,
 120     153: 8482,
 121     154: 353,
 122     155: 353,
 123     156: 339,
 124     159: 376,
 125 }
 126
 127 _XMLEntities = {
 128     'quot': '&#34;',
 129     'amp': '&amp;',
 130     'apos': '&#39;',
 131     'lt': '&lt;',
 132     'gt': '&gt;',
 133     34: '&#34;',
 134     38: '&amp;',
 135     39: '&#39;',
 136     60: '&lt;',
 137     62: '&gt;',
 138     '"': '&#34;',
 139     '&': '&amp;',
 140     "'": '&#39;',
 141     '<': '&lt;',
 142     '>': '&gt;',
 143 }
 144
 145 def ParseEntities(input,OutputXML=True):
 146     output = []
 147     last = 0
 148     for m in re.finditer(_EntityRE, input):
 149         if m.start() > last:
 150             output.append(input[last:m.start()])
 151         last = m.end()
 152         ch = ''
 153         if m.group(2):
 154             ch = int(m.group(2))
 155             if ch in _XMLEntities and OutputXML:
 156                 output.append(_XMLEntities[ch])
 157                 continue
 158             elif ch in _MSEntities:
 159                 ch = _MSEntities[ch]
 160             try:
 161                 output.append(unichr(ch))
 162             except:
 163                 continue
 164         elif m.group(3):
 165             if m.group(3) in _XMLEntities and OutputXML:
 166                 output.append(_XMLEntities[ch])
 167                 continue
 168             if m.group(3) in name2codepoint:
 169                 ch = name2codepoint[m.group(3)]
 170             if ch:
 171                 try:
 172                     output.append(unichr(ch))
 173                 except:
 174                     continue
 175             else:
 176                 if OutputXML:
 177                     output.append('&amp;')
 178                     if m.group(1):
 179                         output.append(m.group(1))
 180                 else:
 181                     output.append(m.group(0))
 182         elif m.group(4):
 183             if OutputXML:
 184                 output.append(_XMLEntities[m.group(4)])
 185             else:
 186                 output.append(m.group(4))
 187     output.append(input[last:])
 188     return ''.join(output)
 189
 190 _DefaultFileRE = re.compile('(?i)comics?/')
 191 _DefaultAnchorURLMap = ('attrs', 'href')
 192 _DefaultImageURLMap = ('attrs', 'src')
 193 _DefaultPrevious = [
 194     {
 195         'name': 'a',
 196         'contents': re.compile('(?i)previous'),
 197         'index': 0,
 198         'map': {
 199             'url': _DefaultAnchorURLMap
 200         }
 201     },
 202     {
 203         'name': 'a',
 204         'contents': re.compile('(?i)prev'),
 205         'index': 0,
 206         'map': {
 207             'url': _DefaultAnchorURLMap
 208         }
 209     },
 210     {
 211         'name': 'a',
 212         'contents': re.compile('(?i)back'),
 213         'index': 0,
 214         'map': {
 215             'url': _DefaultAnchorURLMap
 216         }
 217     },
 218 ]
 219 _DefaultImage = {
 220     'name': 'img',
 221     'attrs': {
 222         'src': re.compile('(?i)(?:^|/)comics?/')
 223     },
 224     'map': {
 225         'url': _DefaultImageURLMap
 226     }
 227 }
 228
 229 def TextFullMatch(item, text):
 230     if isinstance(item, basestring):
 231         return item == text
 232     elif isinstance(item, REType):
 233         return not item.search(text) is None
 234     else:
 235         return text in item
 236
 237 def TextSubMatch(item, text):
 238     if isinstance(item, basestring): return item in text
 239     if isinstance(item, (list, tuple)):
 240         for i in item:
 241             if i in text: return True
 242         return False
 243     else:
 244         return item.search(text)
 245
 246 def ItemMatch(item, tag):
 247     if 'name' in item and not TextFullMatch(item['name'], tag.name):
 248         return False
 249     if 'text' in item and not TextSubMatch(item['text'], tag):
 250         return False
 251     if 'contents' in item and not TextSubMatch(item['contents'],
 252         tag.renderContents(encoding=None)):
 253         return False
 254     if 'attrs' in item and item['attrs']:
 255         attrs = dict(tag.attrs)
 256         for key, value in item['attrs'].iteritems():
 257             if value is False:
 258                 if key in attrs:
 259                     return False
 260                 else:
 261                     continue
 262             if not key in attrs:
 263                 return False
 264             elif value is True:
 265                 continue
 266             elif not TextFullMatch(value, attrs[key]):
 267                 return False
 268     for rel, attr in (('next', 'nextSibling'), ('prev', 'previousSibling')):
 269         if rel in item and item[rel]:
 270             while True:
 271                 next = getattr(tag,attr)
 272                 if not next: return False
 273                 if isinstance(next, BeautifulSoup.Tag): break
 274             if not ItemMatch(item[rel],next): return False
 275     if item.has_key('parent') and item['parent']:
 276         if not tag.parent:
 277             return False
 278         if not ItemMatch(item['parent'],tag.parent): return False
 279     return True
 280
 281 def DeepMerge(d1, d2):
 282     for k in set(d1.keys() + d2.keys()):
 283         if k in d1:
 284             if k in d2:
 285                 if isinstance(d2[k], dict):
 286                     d1[k] = DeepMerge(d1[k],d2[k])
 287         else:
 288             if isinstance(d2[k], dict):
 289                 d1[k] = DeepMerge(dict(), d2[k])
 290             else:
 291                 d1[k] = d2[k]
 292
 293 class ETuple(tuple): pass
 294
 295 class ComicClass:
 296     def __init__(self,**kw):
 297         if not 'name' in kw:
 298             raise TypeError("'name' argument is required")
 299         self.name = kw['name']
 300         for n in ('archive', 'current', 'previous', 'image'):
 301             if n in kw:
 302                 setattr(self, n, kw[n])
 303             else:
 304                 setattr(self, n, {})
 305
 306 class ComicDef:
 307     def __init__(self,**kw):
 308         DB.SetThreadConnection()
 309         if not 'name' in kw:
 310             raise TypeError("'name' argument is required")
 311         self.name = kw['name']
 312         if 'class' in kw:
 313             if kw['class'] in Classes:
 314                 self.parentclass = Classes[kw['class']]
 315             else:
 316                 raise TypeError('comic inherits from undefined class "%s"' % kw['class'])
 317         else:
 318             self.parentclass=None
 319         for n in ('url_start', 'url_current', 'url_archive', 'url_stop'):
 320             setattr(self, n, kw.get(n,''))
 321         for n in ('archive', 'current', 'previous', 'image'):
 322             if self.parentclass and getattr(self.parentclass, n):
 323                 classattr = getattr(self.parentclass, n)
 324                 myattr = []
 325                 if n in kw:
 326                     defattr = kw[n]
 327                     if isinstance(defattr,dict):
 328                         defattr = (defattr,)
 329                     for n in defattr:
 330                         myattr.append(DeepMerge(n, classattr))
 331                 else:
 332                     myattr = (classattr),
 333             else:
 334                 if n in kw:
 335                     if isinstance(kw[n], dict):
 336                         myattr = (kw[n],)
 337                     else:
 338                         myattr = kw[n]
 339                 else:
 340                     myattr = ()
 341             setattr(self,n, ETuple(myattr))
 342         if not(self.previous or self.archive):
 343             self.previous = ETuple(_DefaultPrevious)
 344         if not self.image:
 345             self.image = ETuple((_DefaultImage,))
 346         for i in self.previous + self.current + self.archive:
 347             if 'name' not in i: i['name'] = 'a'
 348             if 'map' not in i: i['map'] = {'url': _DefaultAnchorURLMap}
 349             if 'url' not in i['map']: i['map']['url'] = _DefaultAnchorURLMap
 350         for i in self.image:
 351             if 'name' not in i: i['name'] = 'img'
 352             if 'map' not in i: i['map'] = {'url': _DefaultImageURLMap}
 353             if 'url' not in i['map']: i['map']['url'] = _DefaultImageURLMap
 354         for match in ('image', 'current', 'archive', 'previous'):
 355             getattr(self, match).tags = self.GetTags(getattr(self, match))
 356         if self.previous and self.image:
 357             if self.previous.tags and self.image.tags:
 358                 tags = self.previous.tags | self.image.tags
 359             else:
 360                 tags = set()
 361             self.previous.tags = tags
 362             self.image.tags = tags
 363         self.WorkingURL = ''
 364         if 'massage' in kw:
 365             self.Massage = copy.copy(BeautifulSoup.BeautifulSoup.MARKUP_MASSAGE)
 366             self.Massage.extend([ (x, lambda m: m.expand(y)) for x,y in kw['massage'] ])
 367         else:
 368             self.Massage = BeautifulSoup.BeautifulSoup.MARKUP_MASSAGE
 369         try:
 370             self.DBComic = DB.Comic.byName(self.name)
 371         except:
 372             self.DBComic = None
 373         self.CurrentTitle = ''
 374         self.DirName = NameEnc.Encode(self.name)
 375         self.StoreDir = os.path.join(Conf.comic_store, self.DirName)
 376
 377     @checkHalt
 378     def Load(self, url, tags=()):
 379         (dat, inf) = FetchURL(url, referer=self.WorkingURL)
 380         self.WorkingURL = url
 381         self.HTMLText = dat
 382         kw = {}
 383         if self.Massage:
 384             kw['markupMassage'] = self.Massage
 385         if inf.has_key('Content-Type'):
 386             m = re.search("charset=(.+?)(?:[; ]|$)", inf['Content-Type'])
 387             if m:
 388                 kw['fromEncoding'] = m.group(1)
 389         self.HTMLTree = BeautifulSoup.BeautifulSoup(dat, **kw)
 390
 391     @checkHalt
 392     def FetchFile(self, url, outfile):
 393         return FetchURL(url, referer=self.WorkingURL, outfile=outfile)
 394
 395     def CreateDB(self, clear=0):
 396         if not self.DBComic:
 397             self.DBComic = DB.DoInTransaction(DB.Comic, name=self.name)
 398
 399     def MapItem(self, tag, imap):
 400         ret = {}
 401         for k, vm in imap.iteritems():
 402             v = u''
 403             mytag = tag
 404             while vm and vm[0] in ('nextSibling','previousSibling','parent','next','previous','contentsindex','childbyname'):
 405                 if vm[0] == 'contentsindex':
 406                     mytag = mytag.contents[vm[1]]
 407                     vm = vm[2:]
 408                 elif vm[0] == 'childbyname':
 409                     mytag = mytag.find(vm[1])
 410                     vm = vm[2:]
 411                 else:
 412                     mytag = getattr(mytag,vm[0])
 413                     vm = vm[1:]
 414             if vm[0] == 'attrs':
 415                 if mytag.has_key(vm[1]):
 416                     v = ParseEntities(mytag[vm[1]])
 417                     vm = vm[2:]
 418                 else:
 419                     vm = ()
 420                     v = ''
 421             elif vm[0] == 'text':
 422                 v = unicode(mytag)
 423                 vm = vm[1:]
 424             elif vm[0] == 'contents':
 425                 v = mytag.renderContents(encoding=None)
 426                 vm = vm[1:]
 427             while vm and v:
 428                 if vm[0] == 'url':
 429                     v = re.subn('#.*$', '', v)[0]
 430                     if v:
 431                         v = urlparse.urljoin(self.WorkingURL, v)
 432                         vm = vm[1:]
 433                 elif vm[0] == 'urlunquote':
 434                     v = urllib.unquote(v)
 435                     vm = vm[1:]
 436                 elif isinstance(vm[0], REType):
 437                     if vm[1] == 'match':
 438                         m = vm[0].search(v)
 439                         if m:
 440                             v = m.group(0)
 441                             vm = vm[2:]
 442                         else:
 443                             v = ''
 444                             vm = ()
 445                     elif vm[1] == 'expand':
 446                         m = vm[0].search(v)
 447                         if m:
 448                             v = m.expand(vm[2])
 449                         else:
 450                             v = ''
 451                             vm = vm[3:]
 452                     elif vm[1] == 'replace':
 453                         v = vm[0].sub(vm[2],v)
 454                         vm = vm[3:]
 455                 else:
 456                     v = ''
 457                     vm = ()
 458             if v:
 459                 v = ' '.join(v.strip().split())
 460             if k == 'url':
 461                 v = re.subn('#.*$', '', v)[0]
 462                 if v:
 463                     v = urlparse.urljoin(self.WorkingURL, v)
 464             if v:
 465                 ret[k] = v
 466         return ret
 467
 468     def GetTags(self, item):
 469         if isinstance(item, (tuple, list)):
 470             tags = []
 471             for i in item:
 472                 t = self.GetTags(i)
 473                 if not t:
 474                     return ()
 475                 else:
 476                     tags.extend(t)
 477             return set(tags)
 478         if 're' in item or not 'name' in item:
 479             return ()
 480         tags = [item['name']]
 481         for rel in ('prev', 'next', 'parent'):
 482             if rel in item and item[rel]:
 483                 t = self.GetTags(item[rel])
 484                 if not t: return ()
 485                 else: tags.extend(t)
 486         return set(tags)
 487
 488     @checkHalt
 489     def FindItems(self, item):
 490         if isinstance(item, (tuple, list)):
 491             for i in item:
 492                 ret = self.FindItems(i)
 493                 if ret: return ret
 494             return []
 495         elif 're' in item:
 496             ret = re.findall(item['re'], unicode(self.HTMLData))
 497         else:
 498             ret = self.HTMLTree.findAll(lambda x: ItemMatch(item, x))
 499         ind = None
 500         if 'index' in item:
 501             if item['index'] == 'first':
 502                 ind = 0
 503             elif item['index'] == 'last':
 504                 ind = -1
 505             elif not item['index'] is None:
 506                 ind = int(item['index'])
 507         elif item.has_key('reverse'):
 508             ret.reverse()
 509         if not ind is None:
 510             try:
 511                 ret = [ret[ind]]
 512             except IndexError:
 513                 return []
 514         ret = [ self.MapItem(i, item['map']) for i in ret ]
 515         return ret
 516
 517     def FindItem(self, item):
 518         for i in item:
 519             ret = self.FindItems(i)
 520             if ret:
 521                 return ret[0]
 522         return {}
 523
 524     def FindImages(self):
 525         ret = self.FindItems(self.image)
 526         for img in ret:
 527             if not 'file' in img:
 528                 l = _DefaultFileRE.split(img['url'])
 529                 if len(l) == 2:
 530                     img['file'] = l[1].replace('/','_')
 531                 else:
 532                     img['file'] = img['url'].rsplit('/',1)[-1]
 533         return tuple(ret)
 534
 535     @checkHalt
 536     def FetchComic(self, mode='backlog', exists=''):
 537         item = dict(
 538             url=self.WorkingURL,
 539             images=self.FindImages(),
 540         )
 541         if self.CurrentTitle:
 542             item['title'] = self.CurrentTitle
 543         try:
 544             for image in item['images']:
 545                 sel = DB.Image.selectBy(
 546                     comicID=self.DBComic.id,
 547                     file=image['file']
 548                 )
 549                 if sel.count() == 1:
 550                     prevdb = sel[0]
 551                     prev = dict([ (x, getattr(prevdb,x)) for x in
 552                         ['title', 'extra', 'url', 'file', 'mime']])
 553                 else:
 554                     prevdb = None
 555                     prev = {}
 556                 if prevdb:
 557                     if mode == 'backlog':
 558                         def fun():
 559                             prevdb.issue.url = self.WorkingURL
 560                         DB.DoInTransaction(fun)
 561                         return None
 562                 filename = os.path.join(self.StoreDir, image['file'])
 563                 m = None
 564                 if not os.path.exists(filename) or exists == 'refetch':
 565                     headers = self.FetchFile(image['url'], filename)
 566                     if headers.has_key('Content-Type'):
 567                         m = headers['Content-Type']
 568                 if not m:
 569                     m = mimetypes.guess_type(filename)[0]
 570                 if not m:
 571                     m = Magic.Path(filename)
 572                 if not m:
 573                     m = ''
 574                 image['mime'] = m
 575         except:
 576             for image in item['images']:
 577                 filename = os.path.join(self.StoreDir, image['file'])
 578                 if os.path.exists(filename):
 579                     try: os.remove(filename)
 580                     except: pass
 581             raise
 582         return item
 583
 584     @checkHalt
 585     def GetPrev(self):
 586         prev = self.FindItem(self.previous)
 587         url = prev.get('url','')
 588         if not url:
 589             return False
 590         if url in [ getattr(self,x) for x in \
 591             ('url_current', 'url_start', 'url_archive', \
 592             'WorkingURL') ]:
 593             return False
 594         if DB.Issue.selectBy(
 595             comicID=self.DBComic.id,
 596             url=url
 597         ).count():
 598             return False
 599         if url == self.url_stop:
 600             return False
 601         self.Load(url, self.image.tags)
 602         return True
 603
 604     def FetchComics(self, mode='backlog', exists=''):
 605         try:
 606             if not self.DBComic.issues.count():
 607                 mode = 'archive'
 608             if self.url_archive:
 609                 self.Load(self.url_archive, self.archive.tags)
 610                 archive_comics = self.FindItems(self.archive)
 611                 if mode == 'resume':
 612                     url = self.DBComic.issues.limit(1)[0].url
 613                 elif mode == 'backlog':
 614                     url = self.DBComic.issues.reversed().limit(1)[0].url
 615                 else:
 616                     url = ''
 617                 if url:
 618                     for n in range(len(archive_comics)):
 619                         if archive_comics[n]['url'] == url:
 620                             if mode == 'resume':
 621                                 archive_comics = archive_comics[n+1:]
 622                             else:
 623                                 archive_comics = archive_comics[:n]
 624                             break
 625             else:
 626                 if mode == 'resume':
 627                     self.Load(self.DBComic.issues.limit(1)[0].url, self.previous.tags)
 628                     if not self.GetPrev(): return
 629                 else:
 630                     if self.url_start:
 631                         self.Load(self.url_start,self.current.tags)
 632                         cur = self.FindItem(self.current)
 633                         url = cur.get('url','')
 634                         if url and url not in [ getattr(self,x) for x in \
 635                             ('url_current', 'url_start', 'url_archive', \
 636                             'WorkingURL') ]:
 637                             self.Load(url, self.previous.tags)
 638                             self.CurrentTitle = cur.get('title','')
 639                         else: return
 640                     else:
 641                         self.Load(self.url_current,self.previous.tags)
 642             if not os.path.isdir(self.StoreDir):
 643                 os.makedirs(self.StoreDir)
 644             if mode == 'archive':
 645                 def fun():
 646                     self.DBComic.UpdateTimeStamp()
 647                     DB.Image.deleteBy(comicID=self.DBComic.id)
 648                     DB.Issue.deleteBy(comicID=self.DBComic.id)
 649                 DB.DoInTransaction(fun)
 650         except:
 651                 e = sys.exc_info()
 652                 return ("WARNING", "exception raised (%s) while initializing, " \
 653                     "suggest repeating operation %s later or after resolving " \
 654                     "the problem" % (e[1].__class__.__name__, mode), e)
 655         try:
 656             if mode == 'backlog':
 657                 new_comics = []
 658             if self.url_archive:
 659                 for comic in archive_comics:
 660                     self.Load(comic['url'], self.image.tags)
 661                     self.CurrentTitle = comic.get('title','')
 662                     iss = self.FetchComic(mode, exists)
 663                     if not iss:
 664                         break
 665                     if mode == 'backlog':
 666                         new_comics.append(iss)
 667                     else:
 668                         DB.DoInTransaction(self.DBFromItem, iss, 'old')
 669             else:
 670                 while True:
 671                     iss = self.FetchComic(mode, exists)
 672                     if not iss:
 673                         break
 674                     if mode == 'backlog':
 675                         new_comics.append(iss)
 676                     else:
 677                         DB.DoInTransaction(self.DBFromItem, iss, 'old')
 678                     if not self.GetPrev(): break
 679         except:
 680             e = sys.exc_info()
 681             if isinstance(e[1], KeyboardInterrupt):
 682                 msg = "keyboard interrupt received"
 683             else:
 684                 msg = "exception raised (%s)" % e[1].__class__.__name__
 685             if mode == 'backlog':
 686                 return ("WARNING", "%s, suggest repeating operation backlog" \
 687                     % msg, e)
 688             else:
 689                 return ("WARNING", "%s, suggest repeating operation resume" \
 690                     % msg, e)
 691         if mode == 'backlog':
 692             try:
 693                 try:
 694                     oldhandler = signal(SIGINT, SIG_IGN)
 695                 except: pass
 696                 for iss in reversed(new_comics):
 697                     DB.DoInTransaction(self.DBFromItem, iss)
 698
 699             except:
 700                 e = sys.exc_info()
 701                 return("ERROR", "exception raised (%s) during DB update, " \
 702                     "database may be inconsistent" % e[1].__class__.__name__, \
 703                     e)
 704         return None
 705
 706     def DBFromItem(self, item, ext='new'):
 707         if self.DBComic.issues.count():
 708             if ext == 'new':
 709                 serial = self.DBComic.issues.reversed().limit(1)[0].serial + 1
 710             else:
 711                 serial = self.DBComic.issues.limit(1)[0].serial - 1
 712         else:
 713             serial = 0
 714         if len(item['images']) == 1 and 'title' in item['images'][0] and \
 715             (item['images'][0]['title'] == item.get('title') \
 716             or not item.get('title')):
 717             item['title'] = item['images'][0]['title']
 718             item['images'][0]['title'] = ''
 719         iss = DB.Issue(
 720             comic=self.DBComic,
 721             title=item.get('title',''),
 722             serial=serial,
 723             url=item['url'],
 724         )
 725         imgserial = -1
 726         for i in item['images']:
 727             imgserial += 1
 728             img = DB.Image(
 729                 comic=self.DBComic,
 730                 issue=iss,
 731                 title=i.get('title',''),
 732                 extra=i.get('extra',''),
 733                 serial=imgserial,
 734                 file=i['file'],
 735                 mime=i['mime'],
 736                 url=i['url']
 737             )
 738
 739 def Init():
 740     classes_source = resource_stream('Comic', 'data/comics/classes.yml')
 741     definitions_source = resource_stream('Comic', 'data/comics/definitions.yml')
 742     Merge(classes_source, ComicClass, Classes)
 743     Merge(definitions_source, ComicDef, Defs)
 744
 745 usage = \
 746 """Usage: %s [[operation [comics]] | [[comic_options] [comics]]]
 747 Operations:
 748         -h, --help      Display this usage message
 749         -l, --list      Lists user's selected comics.
 750         -L, --list-all  Lists all available comics.
 751         -A, --add       Add comics to user's selected comics.
 752         -R, --remove    Remove comics from from user's selected comic.
 753
 754 If none of the above operations are specified, program will run in fetch
 755 backlog mode.
 756
 757 Comic options:
 758         -a, --archive   The following comics will be fetched in archive mode.
 759         -r, --resume    The following comics will be fetched in resume mode.
 760         -b, --backlog   The following comics will be fetched in backlog mode.
 761         -f, --refetch   Files existing in the comic archive will be re-fetched.
 762         -F, --no-refetch Files existing in the comic archive will not be
 763                         re-fetched.
 764
 765 Archive mode will fetch from the current strip back to the earliest available
 766 strip. If an archive exists for the comic, its comic strip list will be replaced
 767 with the list of new comics downloaded.
 768
 769 Resume mode will fetch from the oldest strip in the archive's comic strip list
 770 back to the earliest available strip. The new strips will be appended to the
 771 existing list.
 772
 773 Backlog mode will fetch from the current strip back to the newest strip already
 774 in the archive's comic strip list. The new strips will be prepended to the
 775 existing list upon success.
 776
 777 If no comics are specified, all comics selected by the user will be fetched in
 778 the specified mode."""
 779
 780 def FetchThreaded(comic, mode, exists):
 781     DB.SetThreadConnection()
 782     ret = comic.FetchComics(mode=mode, exists=exists)
 783     if not ret is None:
 784         comlock.acquire()
 785         errs.append((comic,)+ret)
 786         comlock.release()
 787     exitsignal.set()
 788
 789 def Run():
 790     global comlock, errs, exitsignal
 791     InitDB()
 792     Init()
 793     from Comic import Conf
 794     comlock = threading.Lock()
 795     exitsignal = threading.Event()
 796     mode = 'backlog'
 797     exists = ''
 798     errors = []
 799     comiclist = []
 800     err = False
 801     opp = 'fetch'
 802     AppName = os.path.basename(sys.argv[0])
 803     if '-h' in sys.argv or '--help' in sys.argv[1:]:
 804         print usage % sys.argv[0]
 805         sys.exit()
 806     if len(sys.argv) > 1:
 807         if sys.argv[1] in ('--list','-l','--list-all','-L'):
 808             if len(sys.argv) > 2:
 809                 sys.exit("--list and --list-all are not valid with other options. Run \"%s --help\" for usage."%(sys.argv[0],))
 810             if sys.argv[1] in ('--list','-l'):
 811                 comics = sorted(map(lambda x: x.name,
 812                     DB.Comic.select()), lambda x,y:
 813                     cmp(x.lower(), y.lower()))
 814                 if not comics:
 815                     sys.stderr.write("No comics in user comic list.\n")
 816                     sys.exit()
 817             else:
 818                 comics = Defs.keys()
 819                 comics.sort()
 820             for c in comics:
 821                 print c
 822             sys.exit()
 823         elif sys.argv[1] in ('-A','--add','-R','--remove'):
 824             comiclist = []
 825             if sys.argv[1] in ('-A','--add'):
 826                 for c in sys.argv[2:]:
 827                     if not c in Defs:
 828                         sys.exit("The comic \"%s\" does not exist in the set of available comic definitions."%c)
 829                     elif not c in comiclist:
 830                         comiclist.append(c)
 831                 for c in comiclist:
 832                     Defs[c].CreateDB()
 833             else:
 834                 for c in sys.argv[2:]:
 835                     if c in Defs:
 836                         sys.exit("The comic \"%s\" does not exist in the set of available comic definitions."%c)
 837                     elif not c in comiclist:
 838                         comiclist.append(c)
 839                 for c in comiclist:
 840                     DB.Comic.deleteBy(name=c)
 841             sys.exit()
 842     fetchlist = []
 843     for arg in sys.argv[1:]:
 844         if arg in ('--resume','-r'):
 845             mode = 'resume'
 846         elif arg in ('--archive','-a'):
 847             mode = 'archive'
 848         elif arg in ('--backlog','-b'):
 849             mode = 'backlog'
 850         elif arg in ('--refetch','-f'):
 851             exists = 'refetch'
 852         elif arg in ('--no-refetch','-F'):
 853             exists = ''
 854         else:
 855             if arg in Defs:
 856                 if Defs[arg].DBComic:
 857                     fetchlist.append((Defs[arg],mode,exists))
 858                 else:
 859                     sys.exit("The comic \"%s\" is not selected for fetching. Try \"%s -A '%s'\" to add it."%(arg,AppName,arg))
 860             else:
 861                 sys.exit("The comic \"%s\". does not exist. Try \"%s -L\" to list available comics."%(arg,AppName))
 862     if not fetchlist:
 863         s = DB.Comic.select()
 864         if s.count():
 865             fetchlist = map(lambda x: (Defs[x.name],mode,exists), s)
 866         else:
 867             sys.exit("No comics specified and none in user's set of selected comics. Try \"%s -h\" to see more options."%AppName)
 868     errs = []
 869     if Conf.threads:
 870         try:
 871             for i in fetchlist:
 872                 while len(threading.enumerate()) > Conf.threads:
 873                     exitsignal.wait(1)
 874                 exitsignal.clear()
 875                 t = threading.Thread(target=FetchThreaded, name=i[0].name, args=i)
 876                 t.start()
 877         except KeyboardInterrupt:
 878             HaltLock.acquire(False)
 879         while len(threading.enumerate()) > 1:
 880             try:
 881                 while len(threading.enumerate()) > 1:
 882                     exitsignal.wait(1)
 883                     exitsignal.clear()
 884             except KeyboardInterrupt:
 885                 HaltLock.acquire(False)
 886     else:
 887         for c in fetchlist:
 888             ret = c[0].FetchComics(c[1],c[2])
 889             if not ret is None:
 890                 errs.append((c[0],) + ret)
 891     for e in errs:
 892         print "%s: (%s) %s" % (e[1],e[0].name,e[2])
 893         traceback.print_exception(e[3][0],e[3][1],e[3][2])