refactor dump-sample, add --podcast parameter
[mygpo.git] / mygpo / db / couchdb / management / commands / dump-sample.py
blob261dd609dfb68f94c7f171b7854306f7f539d6f3
1 from base64 import b64decode
2 from optparse import make_option
3 import sys
5 from couchdb.multipart import write_multipart
7 from django.core.management.base import BaseCommand
9 from mygpo.core.models import Podcast
10 from mygpo.couch import get_main_database
11 from mygpo.directory.toplist import PodcastToplist
12 from mygpo.users.models import User
13 from mygpo.utils import progress
14 from mygpo.json import json
15 from mygpo.db.couchdb.episode import episodes_for_podcast
16 from mygpo.db.couchdb.podcast import podcast_by_id, podcast_for_url
17 from mygpo.db.couchdb.podcast_state import podcast_states_for_user
18 from mygpo.db.couchdb.episode_state import episode_state_for_user_episode
19 from mygpo.db.couchdb.user import suggestions_for_user
20 from mygpo.db.couchdb.directory import category_for_tag
23 class Command(BaseCommand):
24 """
25 Dumps a Sample of the whole Database that can be used for
26 testing/development. All objects that are (indirectly) referenced
27 be the users specified by --user args are dumped.
29 The dump is similar to a dump of couchdb-python's couchdb-dump and
30 can be imported by its couchdb-load
31 """
34 option_list = BaseCommand.option_list + (
35 make_option('--user', action='append', type="string", dest='users', default=[],
36 help="User for which related data should be dumped"),
37 make_option('--toplist', action='store_true', dest='toplist',
38 help="Dump toplist podcasts"),
39 make_option('--podcast', action='append', type="string", dest='podcasts', default=[],
40 help="Feed-URLs of podcasts to dump"),
44 def handle(self, *args, **options):
46 docs = set()
48 for username in options.get('users', []):
49 user = User.get_user(username)
50 self.add_user(user, docs)
52 if options.get('toplist', False):
53 toplist = PodcastToplist()
54 entries = toplist[:25]
56 for n, podcast in entries:
57 self.add_podcast(podcast, docs)
59 for podcast_url in options.get('podcasts'):
60 podcast = podcast_for_url(podcast_url, docs)
61 if not podcast:
62 print 'podcast not found for URL', podcast_url
63 self.add_podcast(podcast, docs)
66 db = get_main_database()
67 docs = sorted(docs)
68 self.dump(docs, db)
71 def add_user(self, user, docs):
73 # User
74 docs.add(user._id)
76 # Suggestions
77 suggestions = suggestions_for_user(user)
78 docs.add(suggestions._id)
80 # Podcast States
81 for p_state in podcast_states_for_user(user):
82 self.add_podcast_state(p_state, user, docs)
85 def add_podcast_state(self, p_state, user, docs):
86 docs.add(p_state._id)
88 # Categories
89 for tag in p_state.tags:
90 c = category_for_tag(tag)
91 if c: docs.add(c._id)
93 # Podcast
94 podcast = podcast_by_id(p_state.podcast)
95 self.add_podcast(podcast, docs)
97 # Episodes
98 for episode in episodes_for_podcast(podcast):
99 e_state = episode_state_for_user_episode(user, episode)
100 self.add_episode_state(e_state, docs)
103 def add_podcast(self, podcast, docs):
104 docs.add(podcast._id)
106 # if podcast is actually a PodcastGroup, we get the first podcast
107 podcast=podcast.get_podcast()
109 # Categories
110 for s in podcast.tags:
111 for tag in podcast.tags[s]:
112 c = category_for_tag(tag)
113 if c: docs.add(c._id)
115 # Episodes
116 for episode in episodes_for_podcast(podcast.get_podcast()):
117 self.add_episode(episode, docs)
120 def add_episode(self, episode, docs):
121 docs.add(episode._id)
124 def add_episode_state(self, e_state, docs):
125 if e_state._id:
126 docs.add(e_state._id)
130 def dump(self, docs, db):
132 output = sys.stdout
133 boundary = None
134 envelope = write_multipart(output, boundary=boundary)
135 total = len(docs)
137 for n, docid in enumerate(docs):
139 if not docid:
140 continue
142 doc = db.get(docid, attachments=True)
143 attachments = doc.pop('_attachments', {})
144 jsondoc = json.dumps(doc)
146 if attachments:
147 parts = envelope.open({
148 'Content-ID': doc['_id'],
149 'ETag': '"%s"' % doc['_rev']
151 parts.add('application/json', jsondoc)
153 for name, info in attachments.items():
154 content_type = info.get('content_type')
155 if content_type is None: # CouchDB < 0.8
156 content_type = info.get('content-type')
157 parts.add(content_type, b64decode(info['data']), {
158 'Content-ID': name
160 parts.close()
162 else:
163 envelope.add('application/json', jsondoc, {
164 'Content-ID': doc['_id'],
165 'ETag': '"%s"' % doc['_rev']
168 progress(n+1, total, docid, stream=sys.stderr)
170 envelope.close()