Update to Django 1.9.1
[mygpo.git] / mygpo / maintenance / merge.py
blob5d486df962a4494d8a613a710f2fdf2bbc5a9947
1 import collections
3 from django.db import transaction, IntegrityError
4 from django.contrib.contenttypes.models import ContentType
5 from django.db.models import Model
6 from django.apps import apps
7 from django.contrib.contenttypes.fields import GenericForeignKey
9 from mygpo.podcasts.models import (MergedUUID, ScopedModel, OrderedModel, Slug,
10 Tag, URL, MergedUUID, Podcast, Episode)
11 from mygpo import utils
12 from mygpo.history.models import HistoryEntry, EpisodeHistoryEntry
13 from mygpo.publisher.models import PublishedPodcast
14 from mygpo.subscriptions.models import Subscription
16 import logging
17 logger = logging.getLogger(__name__)
20 PG_UNIQUE_VIOLATION = 23505
23 class IncorrectMergeException(Exception):
24 pass
27 class PodcastMerger(object):
28 """ Merges podcasts and their related objects """
30 def __init__(self, podcasts, actions, groups):
31 """ Prepares to merge podcasts[1:] into podcasts[0] """
33 for n, podcast1 in enumerate(podcasts):
34 for m, podcast2 in enumerate(podcasts):
35 if podcast1 == podcast2 and n != m:
36 raise IncorrectMergeException(
37 "can't merge podcast %s into itself %s" %
38 (podcast1.get_id(), podcast2.get_id()))
40 self.podcasts = podcasts
41 self.actions = actions
42 self.groups = groups
44 def merge(self):
45 """ Carries out the actual merging """
47 logger.info('Start merging of podcasts: %r', self.podcasts)
49 podcast1 = self.podcasts.pop(0)
50 logger.info('Merge target: %r', podcast1)
52 self.merge_episodes()
53 merge_model_objects(podcast1, self.podcasts)
55 return podcast1
57 def merge_episodes(self):
58 """ Merges the episodes according to the groups """
60 for n, episodes in self.groups:
61 if not episodes:
62 continue
64 episode = episodes.pop(0)
65 logger.info('Merging %d episodes', len(episodes))
66 merge_model_objects(episode, episodes)
69 def reassign_urls(obj1, obj2):
70 # Reassign all URLs of obj2 to obj1
71 max_order = max([0] + [u.order for u in obj1.urls.all()])
73 for n, url in enumerate(obj2.urls.all(), max_order+1):
74 url.content_object = obj1
75 url.order = n
76 url.scope = obj1.scope
77 try:
78 url.save()
79 except IntegrityError as ie:
80 logger.warn('Moving URL failed: %s. Deleting.', str(ie))
81 url.delete()
84 def reassign_merged_uuids(obj1, obj2):
85 # Reassign all IDs of obj2 to obj1
86 MergedUUID.objects.create(uuid=obj2.id, content_object=obj1)
87 for m in obj2.merged_uuids.all():
88 m.content_object = obj1
89 m.save()
92 def reassign_slugs(obj1, obj2):
93 # Reassign all Slugs of obj2 to obj1
94 max_order = max([0] + [s.order for s in obj1.slugs.all()])
95 for n, slug in enumerate(obj2.slugs.all(), max_order+1):
96 slug.content_object = obj1
97 slug.order = n
98 slug.scope = obj1.scope
99 try:
100 slug.save()
101 except IntegrityError as ie:
102 logger.warn('Moving Slug failed: %s. Deleting', str(ie))
103 slug.delete()
106 # based on https://djangosnippets.org/snippets/2283/
107 @transaction.atomic
108 def merge_model_objects(primary_object, alias_objects=[], keep_old=False):
110 Use this function to merge model objects (i.e. Users, Organizations, Polls,
111 etc.) and migrate all of the related fields from the alias objects to the
112 primary object.
114 Usage:
115 from django.contrib.auth.models import User
116 primary_user = User.objects.get(email='good_email@example.com')
117 duplicate_user = User.objects.get(email='good_email+duplicate@example.com')
118 merge_model_objects(primary_user, duplicate_user)
120 if not isinstance(alias_objects, list):
121 alias_objects = [alias_objects]
123 # check that all aliases are the same class as primary one and that
124 # they are subclass of model
125 primary_class = primary_object.__class__
127 if not issubclass(primary_class, Model):
128 raise TypeError('Only django.db.models.Model subclasses can be merged')
130 for alias_object in alias_objects:
131 if not isinstance(alias_object, primary_class):
132 raise TypeError('Only models of same class can be merged')
134 # Get a list of all GenericForeignKeys in all models
135 # TODO: this is a bit of a hack, since the generics framework should
136 # provide a similar method to the ForeignKey field for accessing the
137 # generic related fields.
138 generic_fields = []
139 for model in apps.get_models():
140 fields = filter(lambda x: isinstance(x[1], GenericForeignKey),
141 model.__dict__.items())
142 for field_name, field in fields:
143 generic_fields.append(field)
145 blank_local_fields = set(
146 [field.attname for field
147 in primary_object._meta.local_fields
148 if getattr(primary_object, field.attname) in [None, '']])
150 # Loop through all alias objects and migrate their data to
151 # the primary object.
152 for alias_object in alias_objects:
153 # Migrate all foreign key references from alias object to
154 # primary object.
155 for related_object in alias_object._meta.get_all_related_objects():
156 # The variable name on the alias_object model.
157 alias_varname = related_object.get_accessor_name()
158 # The variable name on the related model.
159 obj_varname = related_object.field.name
160 related_objects = getattr(alias_object, alias_varname)
161 for obj in related_objects.all():
162 setattr(obj, obj_varname, primary_object)
163 reassigned(obj, primary_object)
164 obj.save()
166 # Migrate all many to many references from alias object to
167 # primary object.
168 related = alias_object._meta.get_all_related_many_to_many_objects()
169 for related_many_object in related:
170 alias_varname = related_many_object.get_accessor_name()
171 obj_varname = related_many_object.field.name
173 if alias_varname is not None:
174 # standard case
175 related_many_objects = getattr(alias_object,
176 alias_varname).all()
177 else:
178 # special case, symmetrical relation, no reverse accessor
179 related_many_objects = getattr(alias_object,
180 obj_varname).all()
181 for obj in related_many_objects.all():
182 getattr(obj, obj_varname).remove(alias_object)
183 reassigned(obj, primary_object)
184 getattr(obj, obj_varname).add(primary_object)
186 # Migrate all generic foreign key references from alias
187 # object to primary object.
188 for field in generic_fields:
189 filter_kwargs = {}
190 filter_kwargs[field.fk_field] = alias_object._get_pk_val()
191 filter_kwargs[field.ct_field] = field.get_content_type(
192 alias_object)
193 related = field.model.objects.filter(**filter_kwargs)
194 for generic_related_object in related:
195 setattr(generic_related_object, field.name, primary_object)
196 reassigned(generic_related_object, primary_object)
197 try:
198 # execute save in a savepoint, so we can resume in the
199 # transaction
200 with transaction.atomic():
201 generic_related_object.save()
202 except IntegrityError as ie:
203 if ie.__cause__.pgcode == PG_UNIQUE_VIOLATION:
204 merge(generic_related_object, primary_object)
206 # Try to fill all missing values in primary object by
207 # values of duplicates
208 filled_up = set()
209 for field_name in blank_local_fields:
210 val = getattr(alias_object, field_name)
211 if val not in [None, '']:
212 setattr(primary_object, field_name, val)
213 filled_up.add(field_name)
214 blank_local_fields -= filled_up
216 if not keep_old:
217 before_delete(alias_object, primary_object)
218 alias_object.delete()
219 primary_object.save()
220 return primary_object
223 def reassigned(obj, new):
224 if isinstance(obj, URL):
225 # a URL has its parent's scope
226 obj.scope = new.scope
228 existing_urls = new.urls.all()
229 max_order = max([-1] + [u.order for u in existing_urls])
230 obj.order = max_order+1
232 elif isinstance(obj, Episode):
233 # obj is an Episode, new is a podcast
234 for url in obj.urls.all():
235 url.scope = new.as_scope
236 url.save()
238 elif isinstance(obj, Subscription):
239 pass
241 elif isinstance(obj, EpisodeHistoryEntry):
242 pass
244 elif isinstance(obj, HistoryEntry):
245 pass
247 else:
248 raise TypeError('unknown type for reassigning: {objtype}'.format(
249 objtype=type(obj)))
252 def before_delete(old, new):
254 if isinstance(old, Episode):
255 MergedUUID.objects.create(
256 content_type=ContentType.objects.get_for_model(new),
257 object_id=new.pk,
258 uuid=old.pk,
261 elif isinstance(old, Podcast):
262 MergedUUID.objects.create(
263 content_type=ContentType.objects.get_for_model(new),
264 object_id=new.pk,
265 uuid=old.pk,
268 else:
269 raise TypeError('unknown type for deleting: {objtype}'.format(
270 objtype=type(old)))
273 def merge(moved_obj, new_target):
274 if isinstance(moved_obj, URL):
275 # if we have two conflicting URLs, don't save the second one
276 # URLs don't have any interesting properties (except the URL) that
277 # we could merge
278 pass
280 else:
281 raise TypeError('unknown type for merging: {objtype}'.format(
282 objtype=type(old)))