[Podcasts] fix merging Episodes with same URL
[mygpo.git] / mygpo / maintenance / merge.py
blob65dfc102e4c1b6fea3c61767ecc95eb45b52e81a
1 import collections
3 from django.db import IntegrityError
4 from django.contrib.contenttypes.models import ContentType
6 from mygpo.podcasts.models import (MergedUUID, ScopedModel, OrderedModel, Slug,
7 Tag, URL, MergedUUID, Podcast, Episode)
8 from mygpo import utils
9 from mygpo.history.models import HistoryEntry
10 from mygpo.publisher.models import PublishedPodcast
11 from mygpo.subscriptions.models import Subscription, PodcastConfig
12 from mygpo.db.couchdb.episode_state import all_episode_states, \
13 update_episode_state_object, add_episode_actions, delete_episode_state, \
14 merge_episode_states
16 import logging
17 logger = logging.getLogger(__name__)
20 PG_UNIQUE_VIOLATION = 23505
23 class IncorrectMergeException(Exception):
24 pass
27 class PodcastMerger(object):
28 """ Merges podcasts and their related objects """
30 def __init__(self, podcasts, actions, groups):
31 """ Prepares to merge podcasts[1:] into podcasts[0] """
33 for n, podcast1 in enumerate(podcasts):
34 for m, podcast2 in enumerate(podcasts):
35 if podcast1 == podcast2 and n != m:
36 raise IncorrectMergeException(
37 "can't merge podcast %s into itself %s" %
38 (podcast1.get_id(), podcast2.get_id()))
40 self.podcasts = podcasts
41 self.actions = actions
42 self.groups = groups
44 def merge(self):
45 """ Carries out the actual merging """
47 logger.info('Start merging of podcasts: %r', self.podcasts)
49 podcast1 = self.podcasts.pop(0)
50 logger.info('Merge target: %r', podcast1)
52 self.merge_episodes()
54 for podcast2 in self.podcasts:
55 logger.info('Merging %r into target', podcast2)
56 self.reassign_episodes(podcast1, podcast2)
57 logger.info('Deleting %r', podcast2)
59 merge_model_objects(podcast1, self.podcasts)
61 return podcast1
63 def merge_episodes(self):
64 """ Merges the episodes according to the groups """
66 for n, episodes in self.groups:
67 if not episodes:
68 continue
70 episode = episodes.pop(0)
71 merge_model_objects(episode, episodes)
73 def reassign_episodes(self, podcast1, podcast2):
75 logger.info('Re-assigning episodes of %r into %r', podcast2, podcast1)
77 # re-assign episodes to new podcast
78 # if necessary, they will be merged later anyway
79 for e in podcast2.episode_set.all():
80 self.actions['reassign-episode'] += 1
82 for s in all_episode_states(e):
83 self.actions['reassign-episode-state'] += 1
85 update_episode_state_object(s, podcast1.get_id())
88 class EpisodeMerger(object):
89 """ Merges two episodes """
91 def __init__(self, episode1, episode2, actions):
92 """ episode2 will be merged into episode1 """
94 if episode1 == episode2:
95 raise IncorrectMergeException("can't merge episode into itself")
97 self.episode1 = episode1
98 self.episode2 = episode2
99 self.actions = actions
101 def merge(self):
102 logger.info('Merging episode %r into %r', self.episode2, self.episode1)
103 self.merge_states(self.episode1, self.episode2)
104 logger.info('Deleting %r', self.episode2)
106 def merge_states(self, episode, episode2):
107 key = lambda x: x.user
108 states1 = sorted(all_episode_states(self.episode1), key=key)
109 states2 = sorted(all_episode_states(self.episode2), key=key)
111 logger.info('Merging %d episode states of %r into %r', len(states2),
112 episode2, episode)
114 for state, state2 in utils.iterate_together([states1, states2], key):
115 if state == state2:
116 continue
118 if state is None:
119 self.actions['move-episode-state'] += 1
120 update_episode_state_object(state2,
121 self.episode1.podcast.get_id(),
122 self.episode1.get_id())
124 elif state2 is None:
125 continue
127 else:
128 esm = EpisodeStateMerger(state, state2, self.actions)
129 esm.merge()
132 class EpisodeStateMerger(object):
133 """ Merges state2 in state """
135 def __init__(self, state, state2, actions):
137 if state._id == state2._id:
138 raise IncorrectMergeException(
139 "can't merge episode state into itself")
141 if state.user != state2.user:
142 raise IncorrectMergeException(
143 "states don't belong to the same user")
145 self.state = state
146 self.state2 = state2
147 self.actions = actions
149 def merge(self):
150 merge_episode_states(self.state, self.state2)
151 delete_episode_state(self.state2)
152 self.actions['merge-episode-state'] += 1
155 def reassign_urls(obj1, obj2):
156 # Reassign all URLs of obj2 to obj1
157 max_order = max([0] + [u.order for u in obj1.urls.all()])
159 for n, url in enumerate(obj2.urls.all(), max_order+1):
160 url.content_object = obj1
161 url.order = n
162 url.scope = obj1.scope
163 try:
164 url.save()
165 except IntegrityError as ie:
166 logger.warn('Moving URL failed: %s. Deleting.', str(ie))
167 url.delete()
169 def reassign_merged_uuids(obj1, obj2):
170 # Reassign all IDs of obj2 to obj1
171 MergedUUID.objects.create(uuid=obj2.id, content_object=obj1)
172 for m in obj2.merged_uuids.all():
173 m.content_object = obj1
174 m.save()
176 def reassign_slugs(obj1, obj2):
177 # Reassign all Slugs of obj2 to obj1
178 max_order = max([0] + [s.order for s in obj1.slugs.all()])
179 for n, slug in enumerate(obj2.slugs.all(), max_order+1):
180 slug.content_object = obj1
181 slug.order = n
182 slug.scope = obj1.scope
183 try:
184 slug.save()
185 except IntegrityError as ie:
186 logger.warn('Moving Slug failed: %s. Deleting', str(ie))
187 slug.delete()
191 from django.db import transaction
192 from django.db.models import get_models, Model
193 from django.contrib.contenttypes.generic import GenericForeignKey
195 @transaction.commit_on_success
196 def merge_model_objects(primary_object, alias_objects=[], keep_old=False):
198 Use this function to merge model objects (i.e. Users, Organizations, Polls,
199 etc.) and migrate all of the related fields from the alias objects to the
200 primary object.
202 Usage:
203 from django.contrib.auth.models import User
204 primary_user = User.objects.get(email='good_email@example.com')
205 duplicate_user = User.objects.get(email='good_email+duplicate@example.com')
206 merge_model_objects(primary_user, duplicate_user)
208 if not isinstance(alias_objects, list):
209 alias_objects = [alias_objects]
211 # check that all aliases are the same class as primary one and that
212 # they are subclass of model
213 primary_class = primary_object.__class__
215 if not issubclass(primary_class, Model):
216 raise TypeError('Only django.db.models.Model subclasses can be merged')
218 for alias_object in alias_objects:
219 if not isinstance(alias_object, primary_class):
220 raise TypeError('Only models of same class can be merged')
222 # Get a list of all GenericForeignKeys in all models
223 # TODO: this is a bit of a hack, since the generics framework should provide a similar
224 # method to the ForeignKey field for accessing the generic related fields.
225 generic_fields = []
226 for model in get_models():
227 for field_name, field in filter(lambda x: isinstance(x[1], GenericForeignKey), model.__dict__.iteritems()):
228 generic_fields.append(field)
230 blank_local_fields = set([field.attname for field in primary_object._meta.local_fields if getattr(primary_object, field.attname) in [None, '']])
232 # Loop through all alias objects and migrate their data to the primary object.
233 for alias_object in alias_objects:
234 # Migrate all foreign key references from alias object to primary object.
235 for related_object in alias_object._meta.get_all_related_objects():
236 # The variable name on the alias_object model.
237 alias_varname = related_object.get_accessor_name()
238 # The variable name on the related model.
239 obj_varname = related_object.field.name
240 related_objects = getattr(alias_object, alias_varname)
241 for obj in related_objects.all():
242 setattr(obj, obj_varname, primary_object)
243 reassigned(obj, primary_object)
244 obj.save()
246 # Migrate all many to many references from alias object to primary object.
247 for related_many_object in alias_object._meta.get_all_related_many_to_many_objects():
248 alias_varname = related_many_object.get_accessor_name()
249 obj_varname = related_many_object.field.name
251 if alias_varname is not None:
252 # standard case
253 related_many_objects = getattr(alias_object, alias_varname).all()
254 else:
255 # special case, symmetrical relation, no reverse accessor
256 related_many_objects = getattr(alias_object, obj_varname).all()
257 for obj in related_many_objects.all():
258 getattr(obj, obj_varname).remove(alias_object)
259 reassigned(obj, primary_object)
260 getattr(obj, obj_varname).add(primary_object)
262 # Migrate all generic foreign key references from alias object to primary object.
263 for field in generic_fields:
264 filter_kwargs = {}
265 filter_kwargs[field.fk_field] = alias_object._get_pk_val()
266 filter_kwargs[field.ct_field] = field.get_content_type(alias_object)
267 for generic_related_object in field.model.objects.filter(**filter_kwargs):
268 setattr(generic_related_object, field.name, primary_object)
269 reassigned(generic_related_object, primary_object)
270 try:
271 # execute save in a savepoint, so we can resume in the
272 # transaction
273 with transaction.atomic():
274 generic_related_object.save()
275 except IntegrityError as ie:
276 if ie.__cause__.pgcode == PG_UNIQUE_VIOLATION:
277 merge(generic_related_object, primary_object)
279 # Try to fill all missing values in primary object by values of duplicates
280 filled_up = set()
281 for field_name in blank_local_fields:
282 val = getattr(alias_object, field_name)
283 if val not in [None, '']:
284 setattr(primary_object, field_name, val)
285 filled_up.add(field_name)
286 blank_local_fields -= filled_up
288 if not keep_old:
289 before_delete(alias_object, primary_object)
290 alias_object.delete()
291 primary_object.save()
292 return primary_object
295 # https://djangosnippets.org/snippets/2283/
298 def reassigned(obj, new):
299 if isinstance(obj, URL):
300 # a URL has its parent's scope
301 obj.scope = new.scope
303 existing_urls = new.urls.all()
304 max_order = max([-1] + [u.order for u in existing_urls])
305 obj.order = max_order+1
307 elif isinstance(obj, Episode):
308 # obj is an Episode, new is a podcast
309 for url in obj.urls.all():
310 url.scope = new.as_scope
311 url.save()
313 elif isinstance(obj, Subscription):
314 pass
316 elif isinstance(obj, HistoryEntry):
317 pass
319 else:
320 raise TypeError('unknown type for reassigning: {objtype}'
321 .format(objtype=type(obj)))
324 def before_delete(old, new):
326 if isinstance(old, Episode):
327 m = EpisodeMerger(new, old, collections.Counter())
328 m.merge()
330 MergedUUID.objects.create(
331 content_type=ContentType.objects.get_for_model(new),
332 object_id=new.pk,
333 uuid=old.pk,
336 elif isinstance(old, Podcast):
337 MergedUUID.objects.create(
338 content_type=ContentType.objects.get_for_model(new),
339 object_id=new.pk,
340 uuid=old.pk,
343 else:
344 raise TypeError('unknown type for deleting: {objtype}'
345 .format(objtype=type(old)))
348 def merge(moved_obj, new_target):
349 if isinstance(moved_obj, URL):
350 # if we have two conflicting URLs, don't save the second one
351 # URLs don't have any interesting properties (except the URL) that
352 # we could merge
353 pass
355 else:
356 raise TypeError('unknown type for merging: {objtype}'
357 .format(objtype=type(old)))