Fix some unit tests and bugs
[larjonas-mediagoblin.git] / mediagoblin / gmg_commands / batchaddmedia.py
blobed733b9e86adcf9328841331452418c77f715276
1 # GNU MediaGoblin -- federated, autonomous media hosting
2 # Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS.
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
17 from __future__ import print_function
19 import codecs
20 import csv
21 import os
23 import requests
24 import six
26 from six.moves.urllib.parse import urlparse
28 from mediagoblin.db.models import LocalUser
29 from mediagoblin.gmg_commands import util as commands_util
30 from mediagoblin.submit.lib import (
31 submit_media, get_upload_file_limits,
32 FileUploadLimit, UserUploadLimit, UserPastUploadLimit)
33 from mediagoblin.tools.metadata import compact_and_validate
34 from mediagoblin.tools.translate import pass_to_ugettext as _
35 from jsonschema.exceptions import ValidationError
38 def parser_setup(subparser):
39 subparser.description = """\
40 This command allows the administrator to upload many media files at once."""
41 subparser.epilog = _(u"""For more information about how to properly run this
42 script (and how to format the metadata csv file), read the MediaGoblin
43 documentation page on command line uploading
44 <http://docs.mediagoblin.org/siteadmin/commandline-upload.html>""")
45 subparser.add_argument(
46 'username',
47 help=_(u"Name of user these media entries belong to"))
48 subparser.add_argument(
49 'metadata_path',
50 help=_(
51 u"""Path to the csv file containing metadata information."""))
52 subparser.add_argument(
53 '--celery',
54 action='store_true',
55 help=_(u"Don't process eagerly, pass off to celery"))
58 def batchaddmedia(args):
59 # Run eagerly unless explicetly set not to
60 if not args.celery:
61 os.environ['CELERY_ALWAYS_EAGER'] = 'true'
63 app = commands_util.setup_app(args)
65 files_uploaded, files_attempted = 0, 0
67 # get the user
68 user = app.db.LocalUser.query.filter(
69 LocalUser.username==args.username.lower()
70 ).first()
71 if user is None:
72 print(_(u"Sorry, no user by username '{username}' exists".format(
73 username=args.username)))
74 return
76 upload_limit, max_file_size = get_upload_file_limits(user)
77 temp_files = []
79 if os.path.isfile(args.metadata_path):
80 metadata_path = args.metadata_path
82 else:
83 error = _(u'File at {path} not found, use -h flag for help'.format(
84 path=args.metadata_path))
85 print(error)
86 return
88 abs_metadata_filename = os.path.abspath(metadata_path)
89 abs_metadata_dir = os.path.dirname(abs_metadata_filename)
90 upload_limit, max_file_size = get_upload_file_limits(user)
92 def maybe_unicodeify(some_string):
93 # this is kinda terrible
94 if some_string is None:
95 return None
96 else:
97 return six.text_type(some_string)
99 with codecs.open(
100 abs_metadata_filename, 'r', encoding='utf-8') as all_metadata:
101 contents = all_metadata.read()
102 media_metadata = parse_csv_file(contents)
104 for media_id, file_metadata in media_metadata.iteritems():
105 files_attempted += 1
106 # In case the metadata was not uploaded initialize an empty dictionary.
107 json_ld_metadata = compact_and_validate({})
109 # Get all metadata entries starting with 'media' as variables and then
110 # delete them because those are for internal use only.
111 original_location = file_metadata['location']
113 ### Pull the important media information for mediagoblin from the
114 ### metadata, if it is provided.
115 title = file_metadata.get('title') or file_metadata.get('dc:title')
116 description = (file_metadata.get('description') or
117 file_metadata.get('dc:description'))
119 license = file_metadata.get('license')
120 try:
121 json_ld_metadata = compact_and_validate(file_metadata)
122 except ValidationError as exc:
123 error = _(u"""Error with media '{media_id}' value '{error_path}': {error_msg}
124 Metadata was not uploaded.""".format(
125 media_id=media_id,
126 error_path=exc.path[0],
127 error_msg=exc.message))
128 print(error)
129 continue
131 url = urlparse(original_location)
132 filename = url.path.split()[-1]
134 if url.scheme == 'http':
135 res = requests.get(url.geturl(), stream=True)
136 media_file = res.raw
138 elif url.scheme == '':
139 path = url.path
140 if os.path.isabs(path):
141 file_abs_path = os.path.abspath(path)
142 else:
143 file_path = os.path.join(abs_metadata_dir, path)
144 file_abs_path = os.path.abspath(file_path)
145 try:
146 media_file = file(file_abs_path, 'r')
147 except IOError:
148 print(_(u"""\
149 FAIL: Local file {filename} could not be accessed.
150 {filename} will not be uploaded.""".format(filename=filename)))
151 continue
152 try:
153 submit_media(
154 mg_app=app,
155 user=user,
156 submitted_file=media_file,
157 filename=filename,
158 title=maybe_unicodeify(title),
159 description=maybe_unicodeify(description),
160 license=maybe_unicodeify(license),
161 metadata=json_ld_metadata,
162 tags_string=u"",
163 upload_limit=upload_limit, max_file_size=max_file_size)
164 print(_(u"""Successfully submitted {filename}!
165 Be sure to look at the Media Processing Panel on your website to be sure it
166 uploaded successfully.""".format(filename=filename)))
167 files_uploaded += 1
168 except FileUploadLimit:
169 print(_(
170 u"FAIL: This file is larger than the upload limits for this site."))
171 except UserUploadLimit:
172 print(_(
173 "FAIL: This file will put this user past their upload limits."))
174 except UserPastUploadLimit:
175 print(_("FAIL: This user is already past their upload limits."))
176 print(_(
177 "{files_uploaded} out of {files_attempted} files successfully submitted".format(
178 files_uploaded=files_uploaded,
179 files_attempted=files_attempted)))
182 def unicode_csv_reader(unicode_csv_data, dialect=csv.excel, **kwargs):
183 # csv.py doesn't do Unicode; encode temporarily as UTF-8:
184 # TODO: this probably won't be necessary in Python 3
185 csv_reader = csv.reader(utf_8_encoder(unicode_csv_data),
186 dialect=dialect, **kwargs)
187 for row in csv_reader:
188 # decode UTF-8 back to Unicode, cell by cell:
189 yield [six.text_type(cell, 'utf-8') for cell in row]
191 def utf_8_encoder(unicode_csv_data):
192 for line in unicode_csv_data:
193 yield line.encode('utf-8')
195 def parse_csv_file(file_contents):
197 The helper function which converts the csv file into a dictionary where each
198 item's key is the provided value 'id' and each item's value is another
199 dictionary.
201 list_of_contents = file_contents.split('\n')
202 key, lines = (list_of_contents[0].split(','),
203 list_of_contents[1:])
204 objects_dict = {}
206 # Build a dictionary
207 for index, line in enumerate(lines):
208 if line.isspace() or line == u'': continue
209 values = unicode_csv_reader([line]).next()
210 line_dict = dict([(key[i], val)
211 for i, val in enumerate(values)])
212 media_id = line_dict.get('id') or index
213 objects_dict[media_id] = (line_dict)
215 return objects_dict