mediagoblin/gmg_commands/batchaddmedia.py

   1 # GNU MediaGoblin -- federated, autonomous media hosting
   2 # Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
   3 #
   4 # This program is free software: you can redistribute it and/or modify
   5 # it under the terms of the GNU Affero General Public License as published by
   6 # the Free Software Foundation, either version 3 of the License, or
   7 # (at your option) any later version.
   8 #
   9 # This program is distributed in the hope that it will be useful,
  10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 # GNU Affero General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU Affero General Public License
  15 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16
  17 from __future__ import print_function
  18
  19 import codecs
  20 import csv
  21 import os
  22
  23 import requests
  24 import six
  25
  26 from six.moves.urllib.parse import urlparse
  27
  28 from mediagoblin.db.models import LocalUser
  29 from mediagoblin.gmg_commands import util as commands_util
  30 from mediagoblin.submit.lib import (
  31     submit_media, get_upload_file_limits,
  32     FileUploadLimit, UserUploadLimit, UserPastUploadLimit)
  33 from mediagoblin.tools.metadata import compact_and_validate
  34 from mediagoblin.tools.translate import pass_to_ugettext as _
  35 from jsonschema.exceptions import ValidationError
  36
  37
  38 def parser_setup(subparser):
  39     subparser.description = """\
  40 This command allows the administrator to upload many media files at once."""
  41     subparser.epilog = _(u"""For more information about how to properly run this
  42 script (and how to format the metadata csv file), read the MediaGoblin
  43 documentation page on command line uploading
  44 <http://docs.mediagoblin.org/siteadmin/commandline-upload.html>""")
  45     subparser.add_argument(
  46         'username',
  47         help=_(u"Name of user these media entries belong to"))
  48     subparser.add_argument(
  49         'metadata_path',
  50         help=_(
  51 u"""Path to the csv file containing metadata information."""))
  52     subparser.add_argument(
  53         '--celery',
  54         action='store_true',
  55         help=_(u"Don't process eagerly, pass off to celery"))
  56
  57
  58 def batchaddmedia(args):
  59     # Run eagerly unless explicetly set not to
  60     if not args.celery:
  61         os.environ['CELERY_ALWAYS_EAGER'] = 'true'
  62
  63     app = commands_util.setup_app(args)
  64
  65     files_uploaded, files_attempted = 0, 0
  66
  67     # get the user
  68     user = app.db.LocalUser.query.filter(
  69         LocalUser.username==args.username.lower()
  70     ).first()
  71     if user is None:
  72         print(_(u"Sorry, no user by username '{username}' exists".format(
  73                     username=args.username)))
  74         return
  75
  76     upload_limit, max_file_size = get_upload_file_limits(user)
  77     temp_files = []
  78
  79     if os.path.isfile(args.metadata_path):
  80         metadata_path = args.metadata_path
  81
  82     else:
  83         error = _(u'File at {path} not found, use -h flag for help'.format(
  84                     path=args.metadata_path))
  85         print(error)
  86         return
  87
  88     abs_metadata_filename = os.path.abspath(metadata_path)
  89     abs_metadata_dir = os.path.dirname(abs_metadata_filename)
  90     upload_limit, max_file_size = get_upload_file_limits(user)
  91
  92     def maybe_unicodeify(some_string):
  93         # this is kinda terrible
  94         if some_string is None:
  95             return None
  96         else:
  97             return six.text_type(some_string)
  98
  99     with codecs.open(
 100             abs_metadata_filename, 'r', encoding='utf-8') as all_metadata:
 101         contents = all_metadata.read()
 102         media_metadata = parse_csv_file(contents)
 103
 104     for media_id, file_metadata in media_metadata.iteritems():
 105         files_attempted += 1
 106         # In case the metadata was not uploaded initialize an empty dictionary.
 107         json_ld_metadata = compact_and_validate({})
 108
 109         # Get all metadata entries starting with 'media' as variables and then
 110         # delete them because those are for internal use only.
 111         original_location = file_metadata['location']
 112
 113         ### Pull the important media information for mediagoblin from the
 114         ### metadata, if it is provided.
 115         title = file_metadata.get('title') or file_metadata.get('dc:title')
 116         description = (file_metadata.get('description') or
 117             file_metadata.get('dc:description'))
 118
 119         license = file_metadata.get('license')
 120         try:
 121             json_ld_metadata = compact_and_validate(file_metadata)
 122         except ValidationError as exc:
 123             error = _(u"""Error with media '{media_id}' value '{error_path}': {error_msg}
 124 Metadata was not uploaded.""".format(
 125                 media_id=media_id,
 126                 error_path=exc.path[0],
 127                 error_msg=exc.message))
 128             print(error)
 129             continue
 130
 131         url = urlparse(original_location)
 132         filename = url.path.split()[-1]
 133
 134         if url.scheme == 'http':
 135             res = requests.get(url.geturl(), stream=True)
 136             media_file = res.raw
 137
 138         elif url.scheme == '':
 139             path = url.path
 140             if os.path.isabs(path):
 141                 file_abs_path = os.path.abspath(path)
 142             else:
 143                 file_path = os.path.join(abs_metadata_dir, path)
 144                 file_abs_path = os.path.abspath(file_path)
 145             try:
 146                 media_file = file(file_abs_path, 'r')
 147             except IOError:
 148                 print(_(u"""\
 149 FAIL: Local file {filename} could not be accessed.
 150 {filename} will not be uploaded.""".format(filename=filename)))
 151                 continue
 152         try:
 153             submit_media(
 154                 mg_app=app,
 155                 user=user,
 156                 submitted_file=media_file,
 157                 filename=filename,
 158                 title=maybe_unicodeify(title),
 159                 description=maybe_unicodeify(description),
 160                 license=maybe_unicodeify(license),
 161                 metadata=json_ld_metadata,
 162                 tags_string=u"",
 163                 upload_limit=upload_limit, max_file_size=max_file_size)
 164             print(_(u"""Successfully submitted {filename}!
 165 Be sure to look at the Media Processing Panel on your website to be sure it
 166 uploaded successfully.""".format(filename=filename)))
 167             files_uploaded += 1
 168         except FileUploadLimit:
 169             print(_(
 170 u"FAIL: This file is larger than the upload limits for this site."))
 171         except UserUploadLimit:
 172             print(_(
 173 "FAIL: This file will put this user past their upload limits."))
 174         except UserPastUploadLimit:
 175             print(_("FAIL: This user is already past their upload limits."))
 176     print(_(
 177 "{files_uploaded} out of {files_attempted} files successfully submitted".format(
 178         files_uploaded=files_uploaded,
 179         files_attempted=files_attempted)))
 180
 181
 182 def unicode_csv_reader(unicode_csv_data, dialect=csv.excel, **kwargs):
 183     # csv.py doesn't do Unicode; encode temporarily as UTF-8:
 184     # TODO: this probably won't be necessary in Python 3
 185     csv_reader = csv.reader(utf_8_encoder(unicode_csv_data),
 186                             dialect=dialect, **kwargs)
 187     for row in csv_reader:
 188         # decode UTF-8 back to Unicode, cell by cell:
 189         yield [six.text_type(cell, 'utf-8') for cell in row]
 190
 191 def utf_8_encoder(unicode_csv_data):
 192     for line in unicode_csv_data:
 193         yield line.encode('utf-8')
 194
 195 def parse_csv_file(file_contents):
 196     """
 197     The helper function which converts the csv file into a dictionary where each
 198     item's key is the provided value 'id' and each item's value is another
 199     dictionary.
 200     """
 201     list_of_contents = file_contents.split('\n')
 202     key, lines = (list_of_contents[0].split(','),
 203                   list_of_contents[1:])
 204     objects_dict = {}
 205
 206     # Build a dictionary
 207     for index, line in enumerate(lines):
 208         if line.isspace() or line == u'': continue
 209         values = unicode_csv_reader([line]).next()
 210         line_dict = dict([(key[i], val)
 211             for i, val in enumerate(values)])
 212         media_id = line_dict.get('id') or index
 213         objects_dict[media_id] = (line_dict)
 214
 215     return objects_dict
 216