[Administration] fix merging podcasts and episodes
[mygpo.git] / mygpo / maintenance / merge.py
blobf150a0c74ef218bb045f58ecc164a59fdb3e2531
1 import collections
3 from django.db import IntegrityError
4 from django.contrib.contenttypes.models import ContentType
6 from mygpo.podcasts.models import (MergedUUID, ScopedModel, OrderedModel, Slug,
7 Tag, URL, MergedUUID, Podcast, Episode)
8 from mygpo import utils
9 from mygpo.history.models import HistoryEntry
10 from mygpo.publisher.models import PublishedPodcast
11 from mygpo.subscriptions.models import Subscription, PodcastConfig
12 from mygpo.db.couchdb.episode_state import all_episode_states, \
13 update_episode_state_object, add_episode_actions, delete_episode_state, \
14 merge_episode_states
16 import logging
17 logger = logging.getLogger(__name__)
20 class IncorrectMergeException(Exception):
21 pass
24 class PodcastMerger(object):
25 """ Merges podcasts and their related objects """
27 def __init__(self, podcasts, actions, groups):
28 """ Prepares to merge podcasts[1:] into podcasts[0] """
30 for n, podcast1 in enumerate(podcasts):
31 for m, podcast2 in enumerate(podcasts):
32 if podcast1 == podcast2 and n != m:
33 raise IncorrectMergeException(
34 "can't merge podcast %s into itself %s" %
35 (podcast1.get_id(), podcast2.get_id()))
37 self.podcasts = podcasts
38 self.actions = actions
39 self.groups = groups
41 def merge(self):
42 """ Carries out the actual merging """
44 logger.info('Start merging of podcasts: %r', self.podcasts)
46 podcast1 = self.podcasts.pop(0)
47 logger.info('Merge target: %r', podcast1)
49 self.merge_episodes()
51 for podcast2 in self.podcasts:
52 logger.info('Merging %r into target', podcast2)
53 self.reassign_episodes(podcast1, podcast2)
54 logger.info('Deleting %r', podcast2)
56 merge_model_objects(podcast1, self.podcasts)
58 return podcast1
60 def merge_episodes(self):
61 """ Merges the episodes according to the groups """
63 for n, episodes in self.groups:
64 if not episodes:
65 continue
67 episode = episodes.pop(0)
68 merge_model_objects(episode, episodes)
70 def reassign_episodes(self, podcast1, podcast2):
72 logger.info('Re-assigning episodes of %r into %r', podcast2, podcast1)
74 # re-assign episodes to new podcast
75 # if necessary, they will be merged later anyway
76 for e in podcast2.episode_set.all():
77 self.actions['reassign-episode'] += 1
79 for s in all_episode_states(e):
80 self.actions['reassign-episode-state'] += 1
82 update_episode_state_object(s, podcast1.get_id())
85 class EpisodeMerger(object):
86 """ Merges two episodes """
88 def __init__(self, episode1, episode2, actions):
89 """ episode2 will be merged into episode1 """
91 if episode1 == episode2:
92 raise IncorrectMergeException("can't merge episode into itself")
94 self.episode1 = episode1
95 self.episode2 = episode2
96 self.actions = actions
98 def merge(self):
99 logger.info('Merging episode %r into %r', self.episode2, self.episode1)
100 self.merge_states(self.episode1, self.episode2)
101 logger.info('Deleting %r', self.episode2)
103 def merge_states(self, episode, episode2):
104 key = lambda x: x.user
105 states1 = sorted(all_episode_states(self.episode1), key=key)
106 states2 = sorted(all_episode_states(self.episode2), key=key)
108 logger.info('Merging %d episode states of %r into %r', len(states2),
109 episode2, episode)
111 for state, state2 in utils.iterate_together([states1, states2], key):
112 if state == state2:
113 continue
115 if state is None:
116 self.actions['move-episode-state'] += 1
117 update_episode_state_object(state2,
118 self.episode1.podcast.get_id(),
119 self.episode1.get_id())
121 elif state2 is None:
122 continue
124 else:
125 esm = EpisodeStateMerger(state, state2, self.actions)
126 esm.merge()
129 class EpisodeStateMerger(object):
130 """ Merges state2 in state """
132 def __init__(self, state, state2, actions):
134 if state._id == state2._id:
135 raise IncorrectMergeException(
136 "can't merge episode state into itself")
138 if state.user != state2.user:
139 raise IncorrectMergeException(
140 "states don't belong to the same user")
142 self.state = state
143 self.state2 = state2
144 self.actions = actions
146 def merge(self):
147 merge_episode_states(self.state, self.state2)
148 delete_episode_state(self.state2)
149 self.actions['merge-episode-state'] += 1
152 def reassign_urls(obj1, obj2):
153 # Reassign all URLs of obj2 to obj1
154 max_order = max([0] + [u.order for u in obj1.urls.all()])
156 for n, url in enumerate(obj2.urls.all(), max_order+1):
157 url.content_object = obj1
158 url.order = n
159 url.scope = obj1.scope
160 try:
161 url.save()
162 except IntegrityError as ie:
163 logger.warn('Moving URL failed: %s. Deleting.', str(ie))
164 url.delete()
166 def reassign_merged_uuids(obj1, obj2):
167 # Reassign all IDs of obj2 to obj1
168 MergedUUID.objects.create(uuid=obj2.id, content_object=obj1)
169 for m in obj2.merged_uuids.all():
170 m.content_object = obj1
171 m.save()
173 def reassign_slugs(obj1, obj2):
174 # Reassign all Slugs of obj2 to obj1
175 max_order = max([0] + [s.order for s in obj1.slugs.all()])
176 for n, slug in enumerate(obj2.slugs.all(), max_order+1):
177 slug.content_object = obj1
178 slug.order = n
179 slug.scope = obj1.scope
180 try:
181 slug.save()
182 except IntegrityError as ie:
183 logger.warn('Moving Slug failed: %s. Deleting', str(ie))
184 slug.delete()
188 from django.db import transaction
189 from django.db.models import get_models, Model
190 from django.contrib.contenttypes.generic import GenericForeignKey
192 @transaction.commit_on_success
193 def merge_model_objects(primary_object, alias_objects=[], keep_old=False):
195 Use this function to merge model objects (i.e. Users, Organizations, Polls,
196 etc.) and migrate all of the related fields from the alias objects to the
197 primary object.
199 Usage:
200 from django.contrib.auth.models import User
201 primary_user = User.objects.get(email='good_email@example.com')
202 duplicate_user = User.objects.get(email='good_email+duplicate@example.com')
203 merge_model_objects(primary_user, duplicate_user)
205 if not isinstance(alias_objects, list):
206 alias_objects = [alias_objects]
208 # check that all aliases are the same class as primary one and that
209 # they are subclass of model
210 primary_class = primary_object.__class__
212 if not issubclass(primary_class, Model):
213 raise TypeError('Only django.db.models.Model subclasses can be merged')
215 for alias_object in alias_objects:
216 if not isinstance(alias_object, primary_class):
217 raise TypeError('Only models of same class can be merged')
219 # Get a list of all GenericForeignKeys in all models
220 # TODO: this is a bit of a hack, since the generics framework should provide a similar
221 # method to the ForeignKey field for accessing the generic related fields.
222 generic_fields = []
223 for model in get_models():
224 for field_name, field in filter(lambda x: isinstance(x[1], GenericForeignKey), model.__dict__.iteritems()):
225 generic_fields.append(field)
227 blank_local_fields = set([field.attname for field in primary_object._meta.local_fields if getattr(primary_object, field.attname) in [None, '']])
229 # Loop through all alias objects and migrate their data to the primary object.
230 for alias_object in alias_objects:
231 # Migrate all foreign key references from alias object to primary object.
232 for related_object in alias_object._meta.get_all_related_objects():
233 # The variable name on the alias_object model.
234 alias_varname = related_object.get_accessor_name()
235 # The variable name on the related model.
236 obj_varname = related_object.field.name
237 related_objects = getattr(alias_object, alias_varname)
238 for obj in related_objects.all():
239 setattr(obj, obj_varname, primary_object)
240 reassigned(obj, primary_object)
241 obj.save()
243 # Migrate all many to many references from alias object to primary object.
244 for related_many_object in alias_object._meta.get_all_related_many_to_many_objects():
245 alias_varname = related_many_object.get_accessor_name()
246 obj_varname = related_many_object.field.name
248 if alias_varname is not None:
249 # standard case
250 related_many_objects = getattr(alias_object, alias_varname).all()
251 else:
252 # special case, symmetrical relation, no reverse accessor
253 related_many_objects = getattr(alias_object, obj_varname).all()
254 for obj in related_many_objects.all():
255 getattr(obj, obj_varname).remove(alias_object)
256 reassigned(obj, primary_object)
257 getattr(obj, obj_varname).add(primary_object)
259 # Migrate all generic foreign key references from alias object to primary object.
260 for field in generic_fields:
261 filter_kwargs = {}
262 filter_kwargs[field.fk_field] = alias_object._get_pk_val()
263 filter_kwargs[field.ct_field] = field.get_content_type(alias_object)
264 for generic_related_object in field.model.objects.filter(**filter_kwargs):
265 setattr(generic_related_object, field.name, primary_object)
266 reassigned(generic_related_object, primary_object)
267 generic_related_object.save()
269 # Try to fill all missing values in primary object by values of duplicates
270 filled_up = set()
271 for field_name in blank_local_fields:
272 val = getattr(alias_object, field_name)
273 if val not in [None, '']:
274 setattr(primary_object, field_name, val)
275 filled_up.add(field_name)
276 blank_local_fields -= filled_up
278 if not keep_old:
279 before_delete(alias_object, primary_object)
280 alias_object.delete()
281 primary_object.save()
282 return primary_object
285 # https://djangosnippets.org/snippets/2283/
288 def reassigned(obj, new):
289 if isinstance(obj, URL):
290 # a URL has its parent's scope
291 obj.scope = new.scope
293 existing_urls = new.urls.all()
294 max_order = max([-1] + [u.order for u in existing_urls])
295 obj.order = max_order+1
297 elif isinstance(obj, Episode):
298 # obj is an Episode, new is a podcast
299 for url in obj.urls.all():
300 url.scope = new.as_scope
301 url.save()
303 elif isinstance(obj, Subscription):
304 pass
306 elif isinstance(obj, HistoryEntry):
307 pass
309 else:
310 raise TypeError('unknown type for reassigning: {objtype}'
311 .format(objtype=type(obj)))
314 def before_delete(old, new):
316 if isinstance(old, Episode):
317 m = EpisodeMerger(new, old, collections.Counter())
318 m.merge()
320 MergedUUID.objects.create(
321 content_type=ContentType.objects.get_for_model(new),
322 object_id=new.pk,
323 uuid=old.pk,
326 elif isinstance(old, Podcast):
327 MergedUUID.objects.create(
328 content_type=ContentType.objects.get_for_model(new),
329 object_id=new.pk,
330 uuid=old.pk,
333 else:
334 raise TypeError('unknown type for deleting: {objtype}'
335 .format(objtype=type(old)))