[Podcasts] make merge.py pep8 compliant
[mygpo.git] / mygpo / maintenance / merge.py
bloba4c59ae3f21b5ea0023e6e7fe1bb1d8a69d399b5
1 import collections
3 from django.db import transaction, IntegrityError
4 from django.contrib.contenttypes.models import ContentType
5 from django.db.models import get_models, Model
6 from django.contrib.contenttypes.generic import GenericForeignKey
8 from mygpo.podcasts.models import (MergedUUID, ScopedModel, OrderedModel, Slug,
9 Tag, URL, MergedUUID, Podcast, Episode)
10 from mygpo import utils
11 from mygpo.history.models import HistoryEntry
12 from mygpo.publisher.models import PublishedPodcast
13 from mygpo.subscriptions.models import Subscription, PodcastConfig
14 from mygpo.db.couchdb.episode_state import all_episode_states, \
15 update_episode_state_object, add_episode_actions, delete_episode_state, \
16 merge_episode_states
18 import logging
19 logger = logging.getLogger(__name__)
22 PG_UNIQUE_VIOLATION = 23505
25 class IncorrectMergeException(Exception):
26 pass
29 class PodcastMerger(object):
30 """ Merges podcasts and their related objects """
32 def __init__(self, podcasts, actions, groups):
33 """ Prepares to merge podcasts[1:] into podcasts[0] """
35 for n, podcast1 in enumerate(podcasts):
36 for m, podcast2 in enumerate(podcasts):
37 if podcast1 == podcast2 and n != m:
38 raise IncorrectMergeException(
39 "can't merge podcast %s into itself %s" %
40 (podcast1.get_id(), podcast2.get_id()))
42 self.podcasts = podcasts
43 self.actions = actions
44 self.groups = groups
46 def merge(self):
47 """ Carries out the actual merging """
49 logger.info('Start merging of podcasts: %r', self.podcasts)
51 podcast1 = self.podcasts.pop(0)
52 logger.info('Merge target: %r', podcast1)
54 self.merge_episodes()
56 for podcast2 in self.podcasts:
57 logger.info('Merging %r into target', podcast2)
58 self.reassign_episodes(podcast1, podcast2)
59 logger.info('Deleting %r', podcast2)
61 merge_model_objects(podcast1, self.podcasts)
63 return podcast1
65 def merge_episodes(self):
66 """ Merges the episodes according to the groups """
68 for n, episodes in self.groups:
69 if not episodes:
70 continue
72 episode = episodes.pop(0)
73 merge_model_objects(episode, episodes)
75 def reassign_episodes(self, podcast1, podcast2):
77 logger.info('Re-assigning episodes of %r into %r', podcast2, podcast1)
79 # re-assign episodes to new podcast
80 # if necessary, they will be merged later anyway
81 for e in podcast2.episode_set.all():
82 self.actions['reassign-episode'] += 1
84 for s in all_episode_states(e):
85 self.actions['reassign-episode-state'] += 1
87 update_episode_state_object(s, podcast1.get_id())
90 class EpisodeMerger(object):
91 """ Merges two episodes """
93 def __init__(self, episode1, episode2, actions):
94 """ episode2 will be merged into episode1 """
96 if episode1 == episode2:
97 raise IncorrectMergeException("can't merge episode into itself")
99 self.episode1 = episode1
100 self.episode2 = episode2
101 self.actions = actions
103 def merge(self):
104 logger.info('Merging episode %r into %r', self.episode2, self.episode1)
105 self.merge_states(self.episode1, self.episode2)
106 logger.info('Deleting %r', self.episode2)
108 def merge_states(self, episode, episode2):
109 key = lambda x: x.user
110 states1 = sorted(all_episode_states(self.episode1), key=key)
111 states2 = sorted(all_episode_states(self.episode2), key=key)
113 logger.info('Merging %d episode states of %r into %r', len(states2),
114 episode2, episode)
116 for state, state2 in utils.iterate_together([states1, states2], key):
117 if state == state2:
118 continue
120 if state is None:
121 self.actions['move-episode-state'] += 1
122 update_episode_state_object(
123 state2,
124 self.episode1.podcast.get_id(),
125 self.episode1.get_id()
128 elif state2 is None:
129 continue
131 else:
132 esm = EpisodeStateMerger(state, state2, self.actions)
133 esm.merge()
136 class EpisodeStateMerger(object):
137 """ Merges state2 in state """
139 def __init__(self, state, state2, actions):
141 if state._id == state2._id:
142 raise IncorrectMergeException(
143 "can't merge episode state into itself")
145 if state.user != state2.user:
146 raise IncorrectMergeException(
147 "states don't belong to the same user")
149 self.state = state
150 self.state2 = state2
151 self.actions = actions
153 def merge(self):
154 merge_episode_states(self.state, self.state2)
155 delete_episode_state(self.state2)
156 self.actions['merge-episode-state'] += 1
159 def reassign_urls(obj1, obj2):
160 # Reassign all URLs of obj2 to obj1
161 max_order = max([0] + [u.order for u in obj1.urls.all()])
163 for n, url in enumerate(obj2.urls.all(), max_order+1):
164 url.content_object = obj1
165 url.order = n
166 url.scope = obj1.scope
167 try:
168 url.save()
169 except IntegrityError as ie:
170 logger.warn('Moving URL failed: %s. Deleting.', str(ie))
171 url.delete()
174 def reassign_merged_uuids(obj1, obj2):
175 # Reassign all IDs of obj2 to obj1
176 MergedUUID.objects.create(uuid=obj2.id, content_object=obj1)
177 for m in obj2.merged_uuids.all():
178 m.content_object = obj1
179 m.save()
182 def reassign_slugs(obj1, obj2):
183 # Reassign all Slugs of obj2 to obj1
184 max_order = max([0] + [s.order for s in obj1.slugs.all()])
185 for n, slug in enumerate(obj2.slugs.all(), max_order+1):
186 slug.content_object = obj1
187 slug.order = n
188 slug.scope = obj1.scope
189 try:
190 slug.save()
191 except IntegrityError as ie:
192 logger.warn('Moving Slug failed: %s. Deleting', str(ie))
193 slug.delete()
196 # based on https://djangosnippets.org/snippets/2283/
197 @transaction.commit_on_success
198 def merge_model_objects(primary_object, alias_objects=[], keep_old=False):
200 Use this function to merge model objects (i.e. Users, Organizations, Polls,
201 etc.) and migrate all of the related fields from the alias objects to the
202 primary object.
204 Usage:
205 from django.contrib.auth.models import User
206 primary_user = User.objects.get(email='good_email@example.com')
207 duplicate_user = User.objects.get(email='good_email+duplicate@example.com')
208 merge_model_objects(primary_user, duplicate_user)
210 if not isinstance(alias_objects, list):
211 alias_objects = [alias_objects]
213 # check that all aliases are the same class as primary one and that
214 # they are subclass of model
215 primary_class = primary_object.__class__
217 if not issubclass(primary_class, Model):
218 raise TypeError('Only django.db.models.Model subclasses can be merged')
220 for alias_object in alias_objects:
221 if not isinstance(alias_object, primary_class):
222 raise TypeError('Only models of same class can be merged')
224 # Get a list of all GenericForeignKeys in all models
225 # TODO: this is a bit of a hack, since the generics framework should
226 # provide a similar method to the ForeignKey field for accessing the
227 # generic related fields.
228 generic_fields = []
229 for model in get_models():
230 fields = filter(lambda x: isinstance(x[1], GenericForeignKey),
231 model.__dict__.iteritems())
232 for field_name, field in fields:
233 generic_fields.append(field)
235 blank_local_fields = set(
236 [field.attname for field
237 in primary_object._meta.local_fields
238 if getattr(primary_object, field.attname) in [None, '']])
240 # Loop through all alias objects and migrate their data to
241 # the primary object.
242 for alias_object in alias_objects:
243 # Migrate all foreign key references from alias object to
244 # primary object.
245 for related_object in alias_object._meta.get_all_related_objects():
246 # The variable name on the alias_object model.
247 alias_varname = related_object.get_accessor_name()
248 # The variable name on the related model.
249 obj_varname = related_object.field.name
250 related_objects = getattr(alias_object, alias_varname)
251 for obj in related_objects.all():
252 setattr(obj, obj_varname, primary_object)
253 reassigned(obj, primary_object)
254 obj.save()
256 # Migrate all many to many references from alias object to
257 # primary object.
258 related = alias_object._meta.get_all_related_many_to_many_objects()
259 for related_many_object in related:
260 alias_varname = related_many_object.get_accessor_name()
261 obj_varname = related_many_object.field.name
263 if alias_varname is not None:
264 # standard case
265 related_many_objects = getattr(alias_object,
266 alias_varname).all()
267 else:
268 # special case, symmetrical relation, no reverse accessor
269 related_many_objects = getattr(alias_object,
270 obj_varname).all()
271 for obj in related_many_objects.all():
272 getattr(obj, obj_varname).remove(alias_object)
273 reassigned(obj, primary_object)
274 getattr(obj, obj_varname).add(primary_object)
276 # Migrate all generic foreign key references from alias
277 # object to primary object.
278 for field in generic_fields:
279 filter_kwargs = {}
280 filter_kwargs[field.fk_field] = alias_object._get_pk_val()
281 filter_kwargs[field.ct_field] = field.get_content_type(
282 alias_object)
283 related = field.model.objects.filter(**filter_kwargs)
284 for generic_related_object in related:
285 setattr(generic_related_object, field.name, primary_object)
286 reassigned(generic_related_object, primary_object)
287 try:
288 # execute save in a savepoint, so we can resume in the
289 # transaction
290 with transaction.atomic():
291 generic_related_object.save()
292 except IntegrityError as ie:
293 if ie.__cause__.pgcode == PG_UNIQUE_VIOLATION:
294 merge(generic_related_object, primary_object)
296 # Try to fill all missing values in primary object by
297 # values of duplicates
298 filled_up = set()
299 for field_name in blank_local_fields:
300 val = getattr(alias_object, field_name)
301 if val not in [None, '']:
302 setattr(primary_object, field_name, val)
303 filled_up.add(field_name)
304 blank_local_fields -= filled_up
306 if not keep_old:
307 before_delete(alias_object, primary_object)
308 alias_object.delete()
309 primary_object.save()
310 return primary_object
313 def reassigned(obj, new):
314 if isinstance(obj, URL):
315 # a URL has its parent's scope
316 obj.scope = new.scope
318 existing_urls = new.urls.all()
319 max_order = max([-1] + [u.order for u in existing_urls])
320 obj.order = max_order+1
322 elif isinstance(obj, Episode):
323 # obj is an Episode, new is a podcast
324 for url in obj.urls.all():
325 url.scope = new.as_scope
326 url.save()
328 elif isinstance(obj, Subscription):
329 pass
331 elif isinstance(obj, HistoryEntry):
332 pass
334 else:
335 raise TypeError('unknown type for reassigning: {objtype}'.format(
336 objtype=type(obj)))
339 def before_delete(old, new):
341 if isinstance(old, Episode):
342 m = EpisodeMerger(new, old, collections.Counter())
343 m.merge()
345 MergedUUID.objects.create(
346 content_type=ContentType.objects.get_for_model(new),
347 object_id=new.pk,
348 uuid=old.pk,
351 elif isinstance(old, Podcast):
352 MergedUUID.objects.create(
353 content_type=ContentType.objects.get_for_model(new),
354 object_id=new.pk,
355 uuid=old.pk,
358 else:
359 raise TypeError('unknown type for deleting: {objtype}'.format(
360 objtype=type(old)))
363 def merge(moved_obj, new_target):
364 if isinstance(moved_obj, URL):
365 # if we have two conflicting URLs, don't save the second one
366 # URLs don't have any interesting properties (except the URL) that
367 # we could merge
368 pass
370 else:
371 raise TypeError('unknown type for merging: {objtype}'.format(
372 objtype=type(old)))