dd39cbfb0b3c76065f554436b4346dd54b9ebb69
1 from mygpo
.api
.models
import URLSanitizingRule
, Podcast
, ToplistEntry
, SuggestionEntry
, SubscriptionAction
, SubscriptionMeta
, Subscription
, Episode
, EpisodeAction
, EpisodeToplistEntry
2 from mygpo
.api
.models
.episodes
import Chapter
3 from mygpo
.api
.models
.users
import EpisodeFavorite
4 from mygpo
.data
.models
import BackendSubscription
, Listener
, HistoricPodcastData
, PodcastTag
5 from mygpo
.log
import log
10 def sanitize_url(url
, podcast
=True, episode
=False, rules
=URLSanitizingRule
.objects
.all().order_by('priority')):
11 url
= basic_sanitizing(url
)
12 url
= apply_sanitizing_rules(url
, rules
, podcast
, episode
)
16 def basic_sanitizing(url
):
18 does basic sanitizing through urlparse and additionally converts the netloc to lowercase
20 r
= urlparse
.urlsplit(url
)
21 netloc
= r
.netloc
.lower()
22 r2
= urlparse
.SplitResult(r
.scheme
, netloc
, r
.path
, r
.query
, r
.fragment
)
25 def apply_sanitizing_rules(url
, rules
, podcast
=True, episode
=False):
27 applies all url sanitizing rules to the given url
28 setting podcast=True uses only those rules which have use_podcast set to True.
29 When passing podcast=False this check is ommitted. The same is valid
32 if podcast
: rules
= [r
for r
in rules
if r
.use_podcast
==True]
33 if episode
: rules
= [r
for r
in rules
if r
.use_episode
==True]
38 if r
.search_precompiled
:
39 url
= r
.search_precompiled
.sub(r
.replace
, url
)
41 url
= re
.sub(r
.search
, r
.replace
, url
)
44 c
= getattr(r
, 'hits', 0)
50 def maintenance(dry_run
=False):
52 This currently checks how many podcasts could be removed by
53 applying both basic sanitizing rules and those from the database.
55 This will later be used to replace podcasts!
58 print ' * %s podcasts' % Podcast
.objects
.count()
59 print ' * %s episodes' % Episode
.objects
.count()
60 print ' * %s rules' % URLSanitizingRule
.objects
.count()
62 print ' * dry run - nothing will be written to the database'
65 print 'precompiling regular expressions'
66 rules
= precompile_rules()
81 podcasts
= Podcast
.objects
.only('id', 'url').iterator()
82 total
= Podcast
.objects
.count()
87 if (count
% 1000) == 0: print '% 3.2f%% (podcast id %s)' % (((count
+ 0.0)/total
*100), p
.id)
89 su
= sanitize_url(p
.url
, rules
=rules
)
91 log('failed to sanitize url for podcast %s: %s' % (p
.id, e
))
92 print 'failed to sanitize url for podcast %s: %s' % (p
.id, e
)
101 # invalid podcast, remove
109 log('failed to delete podcast %s: %s' % (p
.id, e
))
110 print 'failed to delete podcast %s: %s' % (p
.id, e
)
116 su_podcast
= Podcast
.objects
.get(url
=su
)
118 except Podcast
.DoesNotExist
, e
:
119 # "target" podcast does not exist, we simply change the url
121 log('updating podcast %s - "%s" => "%s"' % (p
.id, p
.url
, su
))
133 # last option - merge podcasts
136 rewrite_podcasts(p
, su_podcast
)
137 tmp
= Subscription
.objects
.filter(podcast
=p
)
138 if tmp
.count() > 0: print tmp
.count()
144 log('error rewriting podcast %s: %s' % (p
.id, e
))
145 print 'error rewriting podcast %s: %s' % (p
.id, e
)
149 print 'finished %s podcasts' % count
150 print ' * %s unchanged' % p_unchanged
151 print ' * %s merged' % p_merged
152 print ' * %s updated' % p_updated
153 print ' * %s deleted' % p_deleted
154 print ' * %s error' % p_error
157 print ' * %s => %s: %s' % (r
.search
, r
.replace
, getattr(r
, 'hits', 0))
160 total
= Episode
.objects
.count()
161 episodes
= Episode
.objects
.only('id', 'url').iterator()
164 if (count
% 10000) == 0: print '% 3.2f%% (episode id %s)' % (((count
+ 0.0)/total
*100), e
.id)
166 su
= sanitize_url(e
.url
, rules
=rules
, podcast
=False, episode
=True)
167 except Exception, ex
:
168 log('failed to sanitize url for episode %s: %s' % (e
.id, ex
))
169 print 'failed to sanitize url for episode %s: %s' % (e
.id, ex
)
178 # invalid episode, remove
185 except Exception, ex
:
186 log('failed to delete episode %s: %s' % (e
.id, ex
))
187 print 'failed to delete episode %s: %s' % (e
.id, ex
)
193 su_episode
= Episode
.objects
.get(url
=su
, podcast
=e
.podcast
)
195 except Episode
.DoesNotExist
, ex
:
196 # "target" episode does not exist, we simply change the url
198 log('updating episode %s - "%s" => "%s"' % (e
.id, e
.url
, su
))
211 # last option - merge episodes
214 rewrite_episode_actions(e
, su_episode
)
215 rewrite_listeners(e
, su_episode
)
216 rewrite_chapters(e
, su_episode
)
217 rewrite_favorites(e
, su_episode
)
222 except Exception, ex
:
223 log('error rewriting episode %s: %s' % (e
.id, ex
))
224 print 'error rewriting episode %s: %s' % (e
.id, ex
)
229 print 'finished %s episodes' % count
230 print ' * %s unchanged' % e_unchanged
231 print ' * %s merged' % e_merged
232 print ' * %s updated' % e_updated
233 print ' * %s deleted' % e_deleted
234 print ' * %s error' % e_error
236 print 'finished %s podcasts' % count
237 print ' * %s unchanged' % p_unchanged
238 print ' * %s merged' % p_merged
239 print ' * %s updated' % p_updated
240 print ' * %s deleted' % p_deleted
241 print ' * %s error' % p_error
245 print ' * %s => %s: %s' % (r
.search
, r
.replace
, getattr(r
, 'hits', 0))
249 def delete_podcast(p
):
250 SubscriptionAction
.objects
.filter(podcast
=p
).delete()
251 BackendSubscription
.objects
.filter(podcast
=p
).delete()
255 def delete_episode(e
):
256 EpisodeAction
.objects
.filter(episode
=e
).delete()
257 Listener
.objects
.filter(episode
=e
).delete()
261 def rewrite_podcasts(p_old
, p_new
):
263 log('merging podcast %s "%s" to correct podcast %s "%s"' % (p_old
.id, p_old
.url
, p_new
.id, p_new
.url
))
265 # we simply delete incorrect toplist and suggestions entries,
266 # because we can't re-calculate them
267 ToplistEntry
.objects
.filter(podcast
=p_old
).delete()
268 SuggestionEntry
.objects
.filter(podcast
=p_old
).delete()
269 HistoricPodcastData
.objects
.filter(podcast
=p_old
).delete()
270 HistoricPodcastData
.objects
.filter(podcast
=p_new
).delete()
272 rewrite_episodes(p_old
, p_new
)
274 for sm
in SubscriptionMeta
.objects
.filter(podcast
=p_old
):
276 sm_new
= SubscriptionMeta
.objects
.get(user
=sm
.user
, podcast
=p_new
)
277 log('subscription meta %s (user %s, podcast %s) already exists, deleting %s (user %s, podcast %s)' % (sm_new
.id, sm
.user
.id, p_new
.id, sm
.id, sm
.user
.id, p_old
.id))
278 # meta-info already exist for the correct podcast, delete the other one
281 except SubscriptionMeta
.DoesNotExist
:
282 # meta-info for new podcast does not yet exist, update the old one
283 log('updating subscription meta %s (user %s, podcast %s => %s)' % (sm
.id, sm
.user
, p_old
.id, p_new
.id))
287 for sa
in SubscriptionAction
.objects
.filter(podcast
=p_old
):
289 log('updating subscription action %s (device %s, action %s, timestamp %s, podcast %s => %s)' % (sa
.id, sa
.device
.id, sa
.action
, sa
.timestamp
, sa
.podcast
.id, p_new
.id))
293 log('error updating subscription action %s: %s, deleting' % (sa
.id, e
))
296 for sub
in BackendSubscription
.objects
.filter(podcast
=p_old
):
298 log('updating subscription %s (device %s, user %s, since %s, podcast %s => %s)' % (sub
.id, sub
.device
.id, sub
.user
.id, sub
.subscribed_since
, p_old
.id, p_new
.id))
302 log('error updating subscription %s: %s, deleting' % (sub
.id, e
))
305 for tag
in PodcastTag
.objects
.filter(podcast
=p_old
):
307 log('updating tag %s (tag %s, source %s, podcast %s => %s)' % (tag
.id, tag
.tag
, tag
.source
, p_old
.id, p_new
.id))
311 log('error updating tag %s: %s, deleting.' % (tag
.id, e
))
315 def rewrite_episodes(p_old
, p_new
):
317 for e
in Episode
.objects
.filter(podcast
=p_old
):
319 e_new
, created_
= Episode
.objects
.get_or_create(podcast
=p_new
, url
=e
.url
)
321 log('episode %s (url %s, podcast %s) already exists; updating episode actions for episode %s (url %s, podcast %s)' % (e_new
.id, e
.url
, p_new
.id, e
.id, e
.url
, p_old
.id))
322 rewrite_episode_actions(e
, e_new
)
323 log('episode actions for episode %s (url "%s", podcast %s) updated.' % (e
.id, e
.url
, p_old
.id))
324 rewrite_listeners(e
, e_new
)
325 log('listeners for episode %s (url "%s", podcast %s) updated.' % (e
.id, e
.url
, p_old
.id))
326 rewrite_chapters(e
, e_new
)
327 log('chapters for episode %s (url "%s", podcast %s) updated.' % (e
.id, e
.url
, p_old
.id))
328 rewrite_favorites(e
, e_new
)
329 log('favorites for episode %s (url "%s", podcast %s) updated, deleting.' % (e
.id, e
.url
, p_old
.id))
332 except Episode
.DoesNotExist
:
333 log('updating episode %s (url "%s", podcast %s => %s)' % (e
.id, e
.url
, p_old
.id, p_new
.id))
338 def rewrite_episode_actions(e_old
, e_new
):
340 for ea
in EpisodeAction
.objects
.filter(episode
=e_old
):
342 log('updating episode action %s (user %s, timestamp %s, episode %s => %s)' % (ea
.id, ea
.user
.id, ea
.timestamp
, e_old
.id, e_new
.id))
347 log('error updating episode action %s: %s, deleting' % (sa
.id, e
))
351 def rewrite_listeners(e_old
, e_new
):
353 for l
in Listener
.objects
.filter(episode
=e_old
):
355 log('updating listener %s (user %s, device %s, podcast %s, episode %s => %s)' % (l
.id, l
.user
.id, l
.device
.id, l
.podcast
.id, e_old
.id, e_new
.id))
357 l
.podcast
= e_new
.podcast
361 log('error updating listener %s: %s, deleting' % (l
.id, e
))
365 def rewrite_chapters(e_old
, e_new
):
367 for c
in Chapter
.objects
.filter(episode
=e_old
):
369 log('updating chapter %s (user %s, device %s, episode %s => %s)' % (c
.id, c
.device
.id, e_old
.id, e_new
.id))
374 log('error updating chapter %s: %s, deleting' % (c
.id, e
))
378 def rewrite_favorites(e_old
, e_new
):
379 for f
in EpisodeFavorite
.objects
.filter(episode
=e_old
):
381 log('updating favorite %s (user %s, episode %s => %s)' % (f
.id, f
.user
.id, e_old
.id, e_new
.id))
386 log('error updating favorite %s: %s, deleting' % (f
.id, e
))
390 def precompile_rules(rules
=URLSanitizingRule
.objects
.all().order_by('priority')):
393 r
= re
.compile(rule
.search
, re
.UNICODE
)
394 rule
.search_precompile
= r
395 rules_p
.append( rule
)