1 from mygpo
.api
.models
import URLSanitizingRule
, Podcast
, ToplistEntry
, SuggestionEntry
, SubscriptionAction
, SubscriptionMeta
, Subscription
, Episode
, EpisodeAction
2 from mygpo
.api
.models
.episodes
import Chapter
3 from mygpo
.api
.models
.users
import EpisodeFavorite
4 from mygpo
.data
.models
import BackendSubscription
, Listener
, HistoricPodcastData
, PodcastTag
5 from mygpo
.log
import log
9 def sanitize_url(url
, podcast
=True, episode
=False, rules
=URLSanitizingRule
.objects
.all().order_by('priority')):
10 url
= basic_sanitizing(url
)
11 url
= apply_sanitizing_rules(url
, rules
, podcast
, episode
)
15 def basic_sanitizing(url
):
17 does basic sanitizing through urlparse and additionally converts the netloc to lowercase
19 r
= urlparse
.urlsplit(url
)
20 netloc
= r
.netloc
.lower()
21 r2
= urlparse
.SplitResult(r
.scheme
, netloc
, r
.path
, r
.query
, r
.fragment
)
24 def apply_sanitizing_rules(url
, rules
, podcast
=True, episode
=False):
26 applies all url sanitizing rules to the given url
27 setting podcast=True uses only those rules which have use_podcast set to True.
28 When passing podcast=False this check is ommitted. The same is valid
31 if podcast
: rules
= [r
for r
in rules
if r
.use_podcast
==True]
32 if episode
: rules
= [r
for r
in rules
if r
.use_episode
==True]
37 if r
.search_precompiled
:
38 url
= r
.search_precompiled
.sub(r
.replace
, url
)
40 url
= re
.sub(r
.search
, r
.replace
, url
)
43 c
= getattr(r
, 'hits', 0)
49 def maintenance(dry_run
=False):
51 This currently checks how many podcasts could be removed by
52 applying both basic sanitizing rules and those from the database.
54 This will later be used to replace podcasts!
57 print ' * %s podcasts' % Podcast
.objects
.count()
58 print ' * %s episodes' % Episode
.objects
.count()
59 print ' * %s rules' % URLSanitizingRule
.objects
.count()
61 print ' * dry run - nothing will be written to the database'
64 print 'precompiling regular expressions'
65 rules
= precompile_rules()
80 podcasts
= Podcast
.objects
.only('id', 'url').iterator()
81 total
= Podcast
.objects
.count()
86 if (count
% 1000) == 0: print '% 3.2f%% (podcast id %s)' % (((count
+ 0.0)/total
*100), p
.id)
88 su
= sanitize_url(p
.url
, rules
=rules
)
90 log('failed to sanitize url for podcast %s: %s' % (p
.id, e
))
91 print 'failed to sanitize url for podcast %s: %s' % (p
.id, e
)
100 # invalid podcast, remove
108 log('failed to delete podcast %s: %s' % (p
.id, e
))
109 print 'failed to delete podcast %s: %s' % (p
.id, e
)
115 su_podcast
= Podcast
.objects
.get(url
=su
)
117 except Podcast
.DoesNotExist
, e
:
118 # "target" podcast does not exist, we simply change the url
120 log('updating podcast %s - "%s" => "%s"' % (p
.id, p
.url
, su
))
132 # last option - merge podcasts
135 rewrite_podcasts(p
, su_podcast
)
136 tmp
= Subscription
.objects
.filter(podcast
=p
)
137 if tmp
.count() > 0: print tmp
.count()
143 log('error rewriting podcast %s: %s' % (p
.id, e
))
144 print 'error rewriting podcast %s: %s' % (p
.id, e
)
148 print 'finished %s podcasts' % count
149 print ' * %s unchanged' % p_unchanged
150 print ' * %s merged' % p_merged
151 print ' * %s updated' % p_updated
152 print ' * %s deleted' % p_deleted
153 print ' * %s error' % p_error
156 print ' * %s => %s: %s' % (r
.search
, r
.replace
, getattr(r
, 'hits', 0))
159 total
= Episode
.objects
.count()
160 episodes
= Episode
.objects
.only('id', 'url').iterator()
163 if (count
% 10000) == 0: print '% 3.2f%% (episode id %s)' % (((count
+ 0.0)/total
*100), e
.id)
165 su
= sanitize_url(e
.url
, rules
=rules
, podcast
=False, episode
=True)
166 except Exception, ex
:
167 log('failed to sanitize url for episode %s: %s' % (e
.id, ex
))
168 print 'failed to sanitize url for episode %s: %s' % (e
.id, ex
)
177 # invalid episode, remove
184 except Exception, ex
:
185 log('failed to delete episode %s: %s' % (e
.id, ex
))
186 print 'failed to delete episode %s: %s' % (e
.id, ex
)
192 su_episode
= Episode
.objects
.get(url
=su
, podcast
=e
.podcast
)
194 except Episode
.DoesNotExist
, ex
:
195 # "target" episode does not exist, we simply change the url
197 log('updating episode %s - "%s" => "%s"' % (e
.id, e
.url
, su
))
210 # last option - merge episodes
213 rewrite_episode_actions(e
, su_episode
)
214 rewrite_listeners(e
, su_episode
)
215 rewrite_chapters(e
, su_episode
)
216 rewrite_favorites(e
, su_episode
)
221 except Exception, ex
:
222 log('error rewriting episode %s: %s' % (e
.id, ex
))
223 print 'error rewriting episode %s: %s' % (e
.id, ex
)
228 print 'finished %s episodes' % count
229 print ' * %s unchanged' % e_unchanged
230 print ' * %s merged' % e_merged
231 print ' * %s updated' % e_updated
232 print ' * %s deleted' % e_deleted
233 print ' * %s error' % e_error
235 print 'finished %s podcasts' % count
236 print ' * %s unchanged' % p_unchanged
237 print ' * %s merged' % p_merged
238 print ' * %s updated' % p_updated
239 print ' * %s deleted' % p_deleted
240 print ' * %s error' % p_error
244 print ' * %s => %s: %s' % (r
.search
, r
.replace
, getattr(r
, 'hits', 0))
248 def delete_podcast(p
):
249 SubscriptionAction
.objects
.filter(podcast
=p
).delete()
250 BackendSubscription
.objects
.filter(podcast
=p
).delete()
254 def delete_episode(e
):
255 EpisodeAction
.objects
.filter(episode
=e
).delete()
256 Listener
.objects
.filter(episode
=e
).delete()
260 def rewrite_podcasts(p_old
, p_new
):
262 log('merging podcast %s "%s" to correct podcast %s "%s"' % (p_old
.id, p_old
.url
, p_new
.id, p_new
.url
))
264 # we simply delete incorrect toplist and suggestions entries,
265 # because we can't re-calculate them
266 ToplistEntry
.objects
.filter(podcast
=p_old
).delete()
267 SuggestionEntry
.objects
.filter(podcast
=p_old
).delete()
268 HistoricPodcastData
.objects
.filter(podcast
=p_old
).delete()
269 HistoricPodcastData
.objects
.filter(podcast
=p_new
).delete()
271 rewrite_episodes(p_old
, p_new
)
273 for sm
in SubscriptionMeta
.objects
.filter(podcast
=p_old
):
275 sm_new
= SubscriptionMeta
.objects
.get(user
=sm
.user
, podcast
=p_new
)
276 log('subscription meta %s (user %s, podcast %s) already exists, deleting %s (user %s, podcast %s)' % (sm_new
.id, sm
.user
.id, p_new
.id, sm
.id, sm
.user
.id, p_old
.id))
277 # meta-info already exist for the correct podcast, delete the other one
280 except SubscriptionMeta
.DoesNotExist
:
281 # meta-info for new podcast does not yet exist, update the old one
282 log('updating subscription meta %s (user %s, podcast %s => %s)' % (sm
.id, sm
.user
, p_old
.id, p_new
.id))
286 for sa
in SubscriptionAction
.objects
.filter(podcast
=p_old
):
288 log('updating subscription action %s (device %s, action %s, timestamp %s, podcast %s => %s)' % (sa
.id, sa
.device
.id, sa
.action
, sa
.timestamp
, sa
.podcast
.id, p_new
.id))
292 log('error updating subscription action %s: %s, deleting' % (sa
.id, e
))
295 for sub
in BackendSubscription
.objects
.filter(podcast
=p_old
):
297 log('updating subscription %s (device %s, user %s, since %s, podcast %s => %s)' % (sub
.id, sub
.device
.id, sub
.user
.id, sub
.subscribed_since
, p_old
.id, p_new
.id))
301 log('error updating subscription %s: %s, deleting' % (sub
.id, e
))
304 for tag
in PodcastTag
.objects
.filter(podcast
=p_old
):
306 log('updating tag %s (tag %s, source %s, podcast %s => %s)' % (tag
.id, tag
.tag
, tag
.source
, p_old
.id, p_new
.id))
310 log('error updating tag %s: %s, deleting.' % (tag
.id, e
))
314 def rewrite_episodes(p_old
, p_new
):
316 for e
in Episode
.objects
.filter(podcast
=p_old
):
318 e_new
, created_
= Episode
.objects
.get_or_create(podcast
=p_new
, url
=e
.url
)
320 log('episode %s (url %s, podcast %s) already exists; updating episode actions for episode %s (url %s, podcast %s)' % (e_new
.id, e
.url
, p_new
.id, e
.id, e
.url
, p_old
.id))
321 rewrite_episode_actions(e
, e_new
)
322 log('episode actions for episode %s (url "%s", podcast %s) updated.' % (e
.id, e
.url
, p_old
.id))
323 rewrite_listeners(e
, e_new
)
324 log('listeners for episode %s (url "%s", podcast %s) updated.' % (e
.id, e
.url
, p_old
.id))
325 rewrite_chapters(e
, e_new
)
326 log('chapters for episode %s (url "%s", podcast %s) updated.' % (e
.id, e
.url
, p_old
.id))
327 rewrite_favorites(e
, e_new
)
328 log('favorites for episode %s (url "%s", podcast %s) updated, deleting.' % (e
.id, e
.url
, p_old
.id))
331 except Episode
.DoesNotExist
:
332 log('updating episode %s (url "%s", podcast %s => %s)' % (e
.id, e
.url
, p_old
.id, p_new
.id))
337 def rewrite_episode_actions(e_old
, e_new
):
339 for ea
in EpisodeAction
.objects
.filter(episode
=e_old
):
341 log('updating episode action %s (user %s, timestamp %s, episode %s => %s)' % (ea
.id, ea
.user
.id, ea
.timestamp
, e_old
.id, e_new
.id))
346 log('error updating episode action %s: %s, deleting' % (sa
.id, e
))
350 def rewrite_listeners(e_old
, e_new
):
352 for l
in Listener
.objects
.filter(episode
=e_old
):
354 log('updating listener %s (user %s, device %s, podcast %s, episode %s => %s)' % (l
.id, l
.user
.id, l
.device
.id, l
.podcast
.id, e_old
.id, e_new
.id))
356 l
.podcast
= e_new
.podcast
360 log('error updating listener %s: %s, deleting' % (l
.id, e
))
364 def rewrite_chapters(e_old
, e_new
):
366 for c
in Chapter
.objects
.filter(episode
=e_old
):
368 log('updating chapter %s (user %s, device %s, episode %s => %s)' % (c
.id, c
.device
.id, e_old
.id, e_new
.id))
373 log('error updating chapter %s: %s, deleting' % (c
.id, e
))
377 def rewrite_favorites(e_old
, e_new
):
378 for f
in EpisodeFavorite
.objects
.filter(episode
=e_old
):
380 log('updating favorite %s (user %s, episode %s => %s)' % (f
.id, f
.user
.id, e_old
.id, e_new
.id))
385 log('error updating favorite %s: %s, deleting' % (f
.id, e
))
389 def precompile_rules(rules
=URLSanitizingRule
.objects
.all().order_by('priority')):
392 r
= re
.compile(rule
.search
, re
.UNICODE
)
393 rule
.search_precompile
= r
394 rules_p
.append( rule
)