[Search] create mygpo.search app
[mygpo.git] / mygpo / maintenance / migrate.py
blob1c5fc0f473a3e02a625818c0d9c06cb0f29cbd96
1 from __future__ import unicode_literals
3 from mygpo.core.models import Podcast as P, Episode as E, PodcastGroup as G
4 from django.contrib.contenttypes.models import ContentType
5 from django.db import transaction, IntegrityError, DataError
6 from django.utils.text import slugify
7 import json
8 from datetime import datetime
9 from mygpo.podcasts.models import (Podcast, Episode, URL, Slug, Tag,
10 MergedUUID, PodcastGroup, )
11 from mygpo.db.couchdb.podcast_state import podcast_subscriber_count
13 import logging
14 logger = logging.getLogger(__name__)
17 def to_maxlength(cls, field, val):
18 """ Cut val to the maximum length of cls's field """
19 max_length = cls._meta.get_field(field).max_length
20 orig_length = len(val)
21 if orig_length > max_length:
22 val = val[:max_length]
23 logger.warn('%s.%s length reduced from %d to %d',
24 cls.__name__, field, orig_length, max_length)
26 return val
29 def migrate_episode(e):
31 try:
32 podcast, created = Podcast.objects.get_or_create(id=e.podcast)
33 except DataError:
34 # some Episodes have an ID equal to the podcast's URL (not ID)
35 logger.exception('Error while getting/creating podcast stub')
36 return
38 if created:
39 logger.info('Created stub for podcast %s', e.podcast)
41 e2, created = Episode.objects.update_or_create(id=e._id, defaults = {
42 'title': to_maxlength(Episode, 'title', e.title or ''),
43 'subtitle': e.subtitle or '',
44 'guid': to_maxlength(Episode, 'guid', e.guid) if e.guid is not None else None,
45 'description': e.description or '',
46 'content': e.content or '',
47 'link': to_maxlength(Episode, 'link', e.link) if e.link is not None else None,
48 'released': e.released,
49 'author': to_maxlength(Episode, 'author', e.author) if e.author is not None else None,
50 'duration': max(0, e.duration) if e.duration is not None else None,
51 'filesize': max(0, e.filesize) if e.filesize is not None else None,
52 'language': to_maxlength(Episode, 'language', e.language) if e.language is not None else None,
53 'last_update': e.last_update,
54 'outdated': e.outdated,
55 'mimetypes': to_maxlength(Episode, 'mimetypes', ','.join(e.mimetypes)),
56 'listeners': max(0, e.listeners) if e.listeners is not None else None,
57 'content_types': ','.join(e.content_types),
58 'flattr_url': to_maxlength(Episode, 'flattr_url', e.flattr_url) if e.flattr_url else None,
59 'created': datetime.fromtimestamp(e.created_timestamp) if e.created_timestamp else datetime.utcnow(),
60 'license': e.license,
61 'podcast': podcast,
64 update_urls(e, e2)
65 update_slugs(e, e2)
66 update_ids(e, e2)
70 def migrate_podcast(p):
71 logger.info('Migrating podcast %r', p)
73 if p.group_member_name:
74 pid = p.id
75 else:
76 pid = p._id
78 p2, created = Podcast.objects.update_or_create(id=pid, defaults = {
79 'title': p.title or '',
80 'subtitle': p.subtitle or '',
81 'description': p.description or '',
82 'link': p.link,
83 'language': to_maxlength(Podcast, 'language', p.language) if p.language is not None else None,
84 'created': datetime.fromtimestamp(p.created_timestamp) if p.created_timestamp else datetime.utcnow(),
85 'last_update': p.last_update,
86 'license': p.license,
87 'flattr_url': to_maxlength(Podcast, 'flattr_url', p.flattr_url) if p.flattr_url else None,
88 'outdated': p.outdated,
89 'author': to_maxlength(Podcast, 'author', p.author) if p.author is not None else None,
90 'logo_url': p.logo_url,
91 'common_episode_title': to_maxlength(Podcast, 'common_episode_title', p.common_episode_title or ''),
92 'new_location': p.new_location,
93 'latest_episode_timestamp': p.latest_episode_timestamp,
94 'episode_count': p.episode_count or 0,
95 'hub': p.hub,
96 'content_types': ','.join(p.content_types),
97 'restrictions': ','.join(p.restrictions),
98 'twitter': getattr(p, 'twitter', None),
99 'group_member_name': p.group_member_name,
100 'update_interval': p.update_interval,
101 'subscribers': podcast_subscriber_count(p),
104 update_urls(p, p2)
105 update_slugs(p, p2)
106 update_tags(p, p2)
107 update_ids(p, p2)
109 return p2
112 def migrate_podcastgroup(g):
113 logger.info('Migrating podcast group %r', g)
115 g2, created = PodcastGroup.objects.update_or_create(id=g._id, defaults = {
116 'title': g.title,
119 for p in g.podcasts:
120 p2 = migrate_podcast(p)
121 p2.group = g2
122 p2.save()
124 update_slugs(g, g2)
126 return g2
130 def update_urls(old, new):
132 existing_urls = {u.url: u for u in new.urls.all()}
133 logger.info('%d existing URLs', len(existing_urls))
135 new_urls = old.urls
136 logger.info('%d new URLs', len(new_urls))
138 with transaction.atomic():
139 max_order = max([s.order for s in existing_urls.values()] + [len(new_urls)])
140 logger.info('Renumbering URLs starting from %d', max_order)
141 for n, url in enumerate(existing_urls.values(), max_order+1):
142 url.order = n
143 url.save()
145 logger.info('%d existing URLs', len(existing_urls))
146 for n, url in enumerate(new_urls):
147 try:
148 u = existing_urls.pop(url)
149 u.order = n
150 u.save()
151 except KeyError:
152 try:
153 URL.objects.create(url=to_maxlength(URL, 'url', url),
154 content_object=new,
155 order=n,
156 scope=new.scope,
158 except IntegrityError as ie:
159 logger.warn('Could not create URL for %s: %s', new, str(ie).decode('ascii', errors='replace'))
161 with transaction.atomic():
162 delete = [u.pk for u in existing_urls.values()]
163 logger.info('Deleting %d URLs', len(delete))
164 URL.objects.filter(id__in=delete).delete()
167 def update_slugs(old, new):
168 new_slugs = filter(None, [old.slug] + old.merged_slugs +
169 [old.oldid] + old.merged_oldids)
170 new_slugs = map(unicode, new_slugs)
171 new_slugs = map(slugify, new_slugs)
172 new_slugs = map(lambda s: to_maxlength(Slug, 'slug', s), new_slugs)
173 new.set_slugs(new_slugs)
176 @transaction.atomic
177 def update_tags(old, new):
178 # TODO: delete?
179 for tag in old.tags.get('feed', []):
180 t, created = Tag.objects.get_or_create(
181 tag=to_maxlength(Tag, 'tag', unicode(tag)),
182 source=Tag.FEED,
183 content_type=ContentType.objects.get_for_model(new),
184 object_id=new.pk,
188 @transaction.atomic
189 def update_ids(old, new):
190 # TODO: delete?
191 for mid in old.merged_ids:
192 u, created = MergedUUID.objects.get_or_create(
193 uuid = mid,
194 content_type=ContentType.objects.get_for_model(new),
195 object_id=new.pk,
199 from couchdbkit import Database
200 db = Database('http://127.0.0.1:6984/mygpo_core_copy')
201 from couchdbkit.changes import ChangesStream, fold, foreach
204 MIGRATIONS = {
205 'Podcast': (P, migrate_podcast),
206 'Episode': (E, migrate_episode),
207 'PodcastGroup': (G, migrate_podcastgroup),
208 'PodcastList': (None, None),
209 'PodcastSubscriberData': (None, None),
210 'EmailMessage': (None, None),
211 'ExamplePodcasts': (None, None),
212 'CommandStatus': (None, None),
213 'User': (None, None),
216 def migrate_change(c):
217 logger.info('Migrate seq %s', c['seq'])
218 doctype = c['doc']['doc_type']
220 cls, migrate = MIGRATIONS[doctype]
222 if cls is None:
223 logger.warn("Skipping '%s'", doctype)
224 return
226 obj = cls.wrap(c['doc'])
227 migrate(obj)
230 def migrate(since=1187918):
231 with ChangesStream(db,
232 feed="continuous",
233 heartbeat=True,
234 include_docs=True,
235 since=since,
236 ) as stream:
237 for change in stream:
238 migrate_change(change)