client/common_lib/base_packages.py

   1 """
   2 This module defines the BasePackageManager Class which provides an
   3 implementation of the packaging system API providing methods to fetch,
   4 upload and remove packages. Site specific extensions to any of these methods
   5 should inherit this class.
   6 """
   7
   8 import fcntl, logging, os, re, shutil
   9 from autotest_lib.client.bin import os_dep
  10 from autotest_lib.client.common_lib import error, utils, global_config
  11
  12
  13 # the name of the checksum file that stores the packages' checksums
  14 CHECKSUM_FILE = "packages.checksum"
  15
  16
  17 def has_pbzip2():
  18     '''Check if parallel bzip2 is available on this system.'''
  19     try:
  20         os_dep.command('pbzip2')
  21     except ValueError:
  22         return False
  23     return True
  24
  25
  26 # is parallel bzip2 available for use?
  27 _PBZIP2_AVAILABLE = has_pbzip2()
  28
  29
  30 def parse_ssh_path(repo):
  31     '''
  32     Parse ssh://xx@xx/path/to/ and return a tuple with host_line and
  33     remote path
  34     '''
  35
  36     match = re.search('^ssh://(.*?)(/.*)$', repo)
  37     if match:
  38         return match.groups()
  39     else:
  40         raise error.PackageUploadError(
  41             "Incorrect SSH path in global_config: %s" % repo)
  42
  43
  44 def repo_run_command(repo, cmd, ignore_status=False, cd=True):
  45     """Run a command relative to the repos path"""
  46     repo = repo.strip()
  47     run_cmd = None
  48     cd_str = ''
  49     if repo.startswith('ssh://'):
  50         username = None
  51         hostline, remote_path = parse_ssh_path(repo)
  52         if cd:
  53             cd_str = 'cd %s && ' % remote_path
  54         if '@' in hostline:
  55             username, host = hostline.split('@')
  56             run_cmd = 'ssh %s@%s "%s%s"' % (username, host, cd_str, cmd)
  57         else:
  58             run_cmd = 'ssh %s "%s%s"' % (host, cd_str, cmd)
  59
  60     else:
  61         if cd:
  62             cd_str = 'cd %s && ' % repo
  63         run_cmd = "%s%s" % (cd_str, cmd)
  64
  65     if run_cmd:
  66         return utils.run(run_cmd, ignore_status=ignore_status)
  67
  68
  69 def create_directory(repo):
  70     _, remote_path = parse_ssh_path(repo)
  71     repo_run_command(repo, 'mkdir -p %s' % remote_path, cd=False)
  72
  73
  74 def check_diskspace(repo, min_free=None):
  75     # Note: 1 GB = 10**9 bytes (SI unit).
  76     if min_free is None:
  77         min_free = global_config.global_config.get_config_value('PACKAGES',
  78                                                           'minimum_free_space',
  79                                                           type=int, default=1)
  80     try:
  81         df = repo_run_command(repo,
  82                               'df -PB %d . | tail -1' % 10 ** 9).stdout.split()
  83         free_space_gb = int(df[3])
  84     except Exception, e:
  85         raise error.RepoUnknownError('Unknown Repo Error: %s' % e)
  86     if free_space_gb < min_free:
  87         raise error.RepoDiskFullError('Not enough disk space available '
  88                                       '%sg < %sg' % (free_space_gb, min_free))
  89
  90
  91 def check_write(repo):
  92     try:
  93         repo_testfile = '.repo_test_file'
  94         repo_run_command(repo, 'touch %s' % repo_testfile).stdout.strip()
  95         repo_run_command(repo, 'rm ' + repo_testfile)
  96     except error.CmdError:
  97         raise error.RepoWriteError('Unable to write to ' + repo)
  98
  99
 100 def trim_custom_directories(repo, older_than_days=None):
 101     if not repo:
 102         return
 103
 104     if older_than_days is None:
 105         older_than_days = global_config.global_config.get_config_value(
 106             'PACKAGES', 'custom_max_age', type=int, default=40)
 107     cmd = 'find . -type f -atime +%s -exec rm -f {} \;' % older_than_days
 108     repo_run_command(repo, cmd, ignore_status=True)
 109
 110
 111 class RepositoryFetcher(object):
 112     url = None
 113
 114
 115     def fetch_pkg_file(self, filename, dest_path):
 116         """ Fetch a package file from a package repository.
 117
 118         @param filename: The filename of the package file to fetch.
 119         @param dest_path: Destination path to download the file to.
 120
 121         @raises PackageFetchError if the fetch failed
 122         """
 123         raise NotImplementedError()
 124
 125
 126 class HttpFetcher(RepositoryFetcher):
 127     wget_cmd_pattern = 'wget --connect-timeout=15 -nv %s -O %s'
 128
 129
 130     def __init__(self, package_manager, repository_url):
 131         """
 132         @param repository_url: The base URL of the http repository
 133         """
 134         self.run_command = package_manager._run_command
 135         self.url = repository_url
 136
 137
 138     def _quick_http_test(self):
 139         """ Run a simple 30 second wget on the repository to see if it is
 140         reachable. This avoids the need to wait for a full 10min timeout.
 141         """
 142         # just make a temp file to write a test fetch into
 143         mktemp = 'mktemp -u /tmp/tmp.XXXXXX'
 144         dest_file_path = self.run_command(mktemp).stdout.strip()
 145
 146         try:
 147             # build up a wget command
 148             http_cmd = self.wget_cmd_pattern % (self.url, dest_file_path)
 149             try:
 150                 self.run_command(http_cmd, _run_command_dargs={'timeout': 30})
 151             except Exception, e:
 152                 msg = 'HTTP test failed, unable to contact %s: %s'
 153                 raise error.PackageFetchError(msg % (self.url, e))
 154         finally:
 155             self.run_command('rm -rf %s' % dest_file_path)
 156
 157
 158     def fetch_pkg_file(self, filename, dest_path):
 159         logging.info('Fetching %s from %s to %s', filename, self.url,
 160                      dest_path)
 161
 162         # do a quick test to verify the repo is reachable
 163         self._quick_http_test()
 164
 165         # try to retrieve the package via http
 166         package_url = os.path.join(self.url, filename)
 167         try:
 168             cmd = self.wget_cmd_pattern % (package_url, dest_path)
 169             result = self.run_command(cmd)
 170
 171             file_exists = self.run_command(
 172                 'ls %s' % dest_path,
 173                 _run_command_dargs={'ignore_status': True}).exit_status == 0
 174             if not file_exists:
 175                 logging.error('wget failed: %s', result)
 176                 raise error.CmdError(cmd, result)
 177
 178             logging.debug('Successfully fetched %s from %s', filename,
 179                           package_url)
 180         except error.CmdError:
 181             # remove whatever junk was retrieved when the get failed
 182             self.run_command('rm -f %s' % dest_path)
 183
 184             raise error.PackageFetchError('%s not found in %s' % (filename,
 185                                                                   package_url))
 186
 187
 188 class LocalFilesystemFetcher(RepositoryFetcher):
 189     def __init__(self, package_manager, local_dir):
 190         self.run_command = package_manager._run_command
 191         self.url = local_dir
 192
 193
 194     def fetch_pkg_file(self, filename, dest_path):
 195         logging.info('Fetching %s from %s to %s', filename, self.url,
 196                      dest_path)
 197         local_path = os.path.join(self.url, filename)
 198         try:
 199             self.run_command('cp %s %s' % (local_path, dest_path))
 200             logging.debug('Successfully fetched %s from %s', filename,
 201                           local_path)
 202         except error.CmdError, e:
 203             raise error.PackageFetchError(
 204                 'Package %s could not be fetched from %s'
 205                 % (filename, self.url), e)
 206
 207
 208 class BasePackageManager(object):
 209     def __init__(self, pkgmgr_dir, hostname=None, repo_urls=None,
 210                  upload_paths=None, do_locking=True, run_function=utils.run,
 211                  run_function_args=[], run_function_dargs={}):
 212         '''
 213         repo_urls: The list of the repository urls which is consulted
 214                    whilst fetching the package
 215         upload_paths: The list of the upload of repositories to which
 216                       the package is uploaded to
 217         pkgmgr_dir : A directory that can be used by the package manager
 218                       to dump stuff (like checksum files of the repositories
 219                       etc.).
 220         do_locking : Enable locking when the packages are installed.
 221
 222         run_function is used to execute the commands throughout this file.
 223         It defaults to utils.run() but a custom method (if provided) should
 224         be of the same schema as utils.run. It should return a CmdResult
 225         object and throw a CmdError exception. The reason for using a separate
 226         function to run the commands is that the same code can be run to fetch
 227         a package on the local machine or on a remote machine (in which case
 228         ssh_host's run function is passed in for run_function).
 229         '''
 230         # In memory dictionary that stores the checksum's of packages
 231         self._checksum_dict = {}
 232
 233         self.pkgmgr_dir = pkgmgr_dir
 234         self.do_locking = do_locking
 235         self.hostname = hostname
 236         self.repositories = []
 237
 238         # Create an internal function that is a simple wrapper of
 239         # run_function and takes in the args and dargs as arguments
 240         def _run_command(command, _run_command_args=run_function_args,
 241                          _run_command_dargs={}):
 242             '''
 243             Special internal function that takes in a command as
 244             argument and passes it on to run_function (if specified).
 245             The _run_command_dargs are merged into run_function_dargs
 246             with the former having more precedence than the latter.
 247             '''
 248             new_dargs = dict(run_function_dargs)
 249             new_dargs.update(_run_command_dargs)
 250             # avoid polluting logs with extremely verbose packaging output
 251             new_dargs.update({'stdout_tee' : None})
 252
 253             return run_function(command, *_run_command_args,
 254                                 **new_dargs)
 255
 256         self._run_command = _run_command
 257
 258         # Process the repository URLs
 259         if not repo_urls:
 260             repo_urls = []
 261         elif hostname:
 262             repo_urls = self.get_mirror_list(repo_urls)
 263         for url in repo_urls:
 264             self.add_repository(url)
 265
 266         # Process the upload URLs
 267         if not upload_paths:
 268             self.upload_paths = []
 269         else:
 270             self.upload_paths = list(upload_paths)
 271
 272
 273     def add_repository(self, repo):
 274         if isinstance(repo, basestring):
 275             self.repositories.append(self.get_fetcher(repo))
 276         elif isinstance(repo, RepositoryFetcher):
 277             self.repositories.append(repo)
 278         else:
 279             raise TypeError("repo must be RepositoryFetcher or url string")
 280
 281
 282     def get_fetcher(self, url):
 283         if url.startswith('http://'):
 284             return HttpFetcher(self, url)
 285         else:
 286             return LocalFilesystemFetcher(self, url)
 287
 288
 289     def repo_check(self, repo):
 290         '''
 291         Check to make sure the repo is in a sane state:
 292         ensure we have at least XX amount of free space
 293         Make sure we can write to the repo
 294         '''
 295         if not repo.startswith('/') and not repo.startswith('ssh:'):
 296             return
 297         try:
 298             create_directory(repo)
 299             check_diskspace(repo)
 300             check_write(repo)
 301         except (error.RepoWriteError, error.RepoUnknownError,
 302                 error.RepoDiskFullError), e:
 303             raise error.RepoError("ERROR: Repo %s: %s" % (repo, e))
 304
 305
 306     def upkeep(self, custom_repos=None):
 307         '''
 308         Clean up custom upload/download areas
 309         '''
 310         from autotest_lib.server import subcommand
 311         if not custom_repos:
 312             # Not all package types necessarily require or allow custom repos
 313             try:
 314                 custom_repos = global_config.global_config.get_config_value(
 315                     'PACKAGES', 'custom_upload_location').split(',')
 316             except global_config.ConfigError:
 317                 custom_repos = []
 318             try:
 319                 custom_download = global_config.global_config.get_config_value(
 320                     'PACKAGES', 'custom_download_location')
 321                 custom_repos += [custom_download]
 322             except global_config.ConfigError:
 323                 pass
 324
 325             if not custom_repos:
 326                 return
 327
 328         subcommand.parallel_simple(trim_custom_directories, custom_repos,
 329                                    log=False)
 330
 331
 332     def install_pkg(self, name, pkg_type, fetch_dir, install_dir,
 333                     preserve_install_dir=False, repo_url=None):
 334         '''
 335         Remove install_dir if it already exists and then recreate it unless
 336         preserve_install_dir is specified as True.
 337         Fetch the package into the pkg_dir. Untar the package into install_dir
 338         The assumption is that packages are of the form :
 339         <pkg_type>.<pkg_name>.tar.bz2
 340         name        : name of the package
 341         type        : type of the package
 342         fetch_dir   : The directory into which the package tarball will be
 343                       fetched to.
 344         install_dir : the directory where the package files will be untarred to
 345         repo_url    : the url of the repository to fetch the package from.
 346         '''
 347
 348         # do_locking flag is on by default unless you disable it (typically
 349         # in the cases where packages are directly installed from the server
 350         # onto the client in which case fcntl stuff wont work as the code
 351         # will run on the server in that case..
 352         if self.do_locking:
 353             lockfile_name = '.%s-%s-lock' % (name, pkg_type)
 354             lockfile = open(os.path.join(self.pkgmgr_dir, lockfile_name), 'w')
 355
 356         try:
 357             if self.do_locking:
 358                 fcntl.flock(lockfile, fcntl.LOCK_EX)
 359
 360             self._run_command('mkdir -p %s' % fetch_dir)
 361
 362             pkg_name = self.get_tarball_name(name, pkg_type)
 363             fetch_path = os.path.join(fetch_dir, pkg_name)
 364             try:
 365                 # Fetch the package into fetch_dir
 366                 self.fetch_pkg(pkg_name, fetch_path, use_checksum=True)
 367
 368                 # check to see if the install_dir exists and if it does
 369                 # then check to see if the .checksum file is the latest
 370                 install_dir_exists = False
 371                 try:
 372                     self._run_command("ls %s" % install_dir)
 373                     install_dir_exists = True
 374                 except (error.CmdError, error.AutoservRunError):
 375                     pass
 376
 377                 if (install_dir_exists and
 378                     not self.untar_required(fetch_path, install_dir)):
 379                     return
 380
 381                 # untar the package into install_dir and
 382                 # update the checksum in that directory
 383                 if not preserve_install_dir:
 384                     # Make sure we clean up the install_dir
 385                     self._run_command('rm -rf %s' % install_dir)
 386                 self._run_command('mkdir -p %s' % install_dir)
 387
 388                 self.untar_pkg(fetch_path, install_dir)
 389
 390             except error.PackageFetchError, why:
 391                 raise error.PackageInstallError(
 392                     'Installation of %s(type:%s) failed : %s'
 393                     % (name, pkg_type, why))
 394         finally:
 395             if self.do_locking:
 396                 fcntl.flock(lockfile, fcntl.LOCK_UN)
 397                 lockfile.close()
 398
 399
 400     def fetch_pkg(self, pkg_name, dest_path, repo_url=None, use_checksum=False):
 401         '''
 402         Fetch the package into dest_dir from repo_url. By default repo_url
 403         is None and the package is looked in all the repositories specified.
 404         Otherwise it fetches it from the specific repo_url.
 405         pkg_name     : name of the package (ex: test-sleeptest.tar.bz2,
 406                                             dep-gcc.tar.bz2, kernel.1-1.rpm)
 407         repo_url     : the URL of the repository where the package is located.
 408         dest_path    : complete path of where the package will be fetched to.
 409         use_checksum : This is set to False to fetch the packages.checksum file
 410                        so that the checksum comparison is bypassed for the
 411                        checksum file itself. This is used internally by the
 412                        packaging system. It should be ignored by externals
 413                        callers of this method who use it fetch custom packages.
 414         '''
 415
 416         try:
 417             self._run_command("ls %s" % os.path.dirname(dest_path))
 418         except (error.CmdError, error.AutoservRunError):
 419             raise error.PackageFetchError("Please provide a valid "
 420                                           "destination: %s " % dest_path)
 421
 422         # See if the package was already fetched earlier, if so
 423         # the checksums need to be compared and the package is now
 424         # fetched only if they differ.
 425         pkg_exists = False
 426         try:
 427             self._run_command("ls %s" % dest_path)
 428             pkg_exists = True
 429         except (error.CmdError, error.AutoservRunError):
 430             pass
 431
 432         # if a repository location is explicitly provided, fetch the package
 433         # from there and return
 434         if repo_url:
 435             repositories = [self.get_fetcher(repo_url)]
 436         elif self.repositories:
 437             repositories = self.repositories
 438         else:
 439             raise error.PackageFetchError("No repository urls specified")
 440
 441         # install the package from the package repos, try the repos in
 442         # reverse order, assuming that the 'newest' repos are most desirable
 443         for fetcher in reversed(repositories):
 444             try:
 445                 # Fetch the package if it is not there, the checksum does
 446                 # not match, or checksums are disabled entirely
 447                 need_to_fetch = (
 448                         not use_checksum or not pkg_exists
 449                         or not self.compare_checksum(dest_path, fetcher.url))
 450                 if need_to_fetch:
 451                     fetcher.fetch_pkg_file(pkg_name, dest_path)
 452                 return
 453             except (error.PackageFetchError, error.AutoservRunError):
 454                 # The package could not be found in this repo, continue looking
 455                 logging.debug('%s could not be fetched from %s', pkg_name,
 456                               fetcher.url)
 457
 458         repo_url_list = [repo.url for repo in repositories]
 459         message = ('%s could not be fetched from any of the repos %s' %
 460                    (pkg_name, repo_url_list))
 461         logging.error(message)
 462         # if we got here then that means the package is not found
 463         # in any of the repositories.
 464         raise error.PackageFetchError(message)
 465
 466
 467     def upload_pkg(self, pkg_path, upload_path=None, update_checksum=False,
 468                    timeout=300):
 469         from autotest_lib.server import subcommand
 470         if upload_path:
 471             upload_path_list = [upload_path]
 472             self.upkeep(upload_path_list)
 473         elif len(self.upload_paths) > 0:
 474             self.upkeep()
 475             upload_path_list = self.upload_paths
 476         else:
 477             raise error.PackageUploadError("Invalid Upload Path specified")
 478
 479         if update_checksum:
 480             # get the packages' checksum file and update it with the current
 481             # package's checksum
 482             self.update_checksum(pkg_path)
 483
 484         commands = []
 485         for path in upload_path_list:
 486             commands.append(subcommand.subcommand(self.upload_pkg_parallel,
 487                                                   (pkg_path, path,
 488                                                    update_checksum)))
 489
 490         results = subcommand.parallel(commands, timeout, return_results=True)
 491         for result in results:
 492             if result:
 493                 print str(result)
 494
 495
 496     # TODO(aganti): Fix the bug with the current checksum logic where
 497     # packages' checksums that are not present consistently in all the
 498     # repositories are not handled properly. This is a corner case though
 499     # but the ideal solution is to make the checksum file repository specific
 500     # and then maintain it.
 501     def upload_pkg_parallel(self, pkg_path, upload_path, update_checksum=False):
 502         '''
 503         Uploads to a specified upload_path or to all the repos.
 504         Also uploads the checksum file to all the repos.
 505         pkg_path        : The complete path to the package file
 506         upload_path     : the absolute path where the files are copied to.
 507                           if set to 'None' assumes 'all' repos
 508         update_checksum : If set to False, the checksum file is not
 509                           going to be updated which happens by default.
 510                           This is necessary for custom
 511                           packages (like custom kernels and custom tests)
 512                           that get uploaded which do not need to be part of
 513                           the checksum file and bloat it.
 514         '''
 515         self.repo_check(upload_path)
 516         # upload the package
 517         if os.path.isdir(pkg_path):
 518             self.upload_pkg_dir(pkg_path, upload_path)
 519         else:
 520             self.upload_pkg_file(pkg_path, upload_path)
 521             if update_checksum:
 522                 self.upload_pkg_file(self._get_checksum_file_path(),
 523                                      upload_path)
 524
 525
 526     def upload_pkg_file(self, file_path, upload_path):
 527         '''
 528         Upload a single file. Depending on the upload path, the appropriate
 529         method for that protocol is called. Currently this simply copies the
 530         file to the target directory (but can be extended for other protocols)
 531         This assumes that the web server is running on the same machine where
 532         the method is being called from. The upload_path's files are
 533         basically served by that web server.
 534         '''
 535         try:
 536             if upload_path.startswith('ssh://'):
 537                 # parse ssh://user@host/usr/local/autotest/packages
 538                 hostline, remote_path = parse_ssh_path(upload_path)
 539                 try:
 540                     utils.run('scp %s %s:%s' % (file_path, hostline,
 541                                                 remote_path))
 542                     r_path = os.path.join(remote_path,
 543                                           os.path.basename(file_path))
 544                     utils.run("ssh %s 'chmod 644 %s'" % (hostline, r_path))
 545                 except error.CmdError:
 546                     logging.error("Error uploading to repository %s",
 547                                   upload_path)
 548             else:
 549                 shutil.copy(file_path, upload_path)
 550                 os.chmod(os.path.join(upload_path,
 551                                       os.path.basename(file_path)), 0644)
 552         except (IOError, os.error), why:
 553             logging.error("Upload of %s to %s failed: %s", file_path,
 554                           upload_path, why)
 555
 556
 557     def upload_pkg_dir(self, dir_path, upload_path):
 558         '''
 559         Upload a full directory. Depending on the upload path, the appropriate
 560         method for that protocol is called. Currently this copies the whole
 561         tmp package directory to the target directory.
 562         This assumes that the web server is running on the same machine where
 563         the method is being called from. The upload_path's files are
 564         basically served by that web server.
 565         '''
 566         local_path = os.path.join(dir_path, "*")
 567         try:
 568             if upload_path.startswith('ssh://'):
 569                 hostline, remote_path = parse_ssh_path(upload_path)
 570                 try:
 571                     utils.run('scp %s %s:%s' % (local_path, hostline,
 572                                                 remote_path))
 573                     ssh_path = os.path.join(remote_path, "*")
 574                     utils.run("ssh %s 'chmod 644 %s'" % (hostline, ssh_path))
 575                 except error.CmdError:
 576                     logging.error("Error uploading to repository: %s",
 577                                   upload_path)
 578             else:
 579                 utils.run("cp %s %s " % (local_path, upload_path))
 580                 up_path = os.path.join(upload_path, "*")
 581                 utils.run("chmod 644 %s" % up_path)
 582         except (IOError, os.error), why:
 583             raise error.PackageUploadError("Upload of %s to %s failed: %s"
 584                                            % (dir_path, upload_path, why))
 585
 586
 587     def remove_pkg(self, pkg_name, remove_path=None, remove_checksum=False):
 588         '''
 589         Remove the package from the specified remove_path
 590         pkg_name    : name of the package (ex: test-sleeptest.tar.bz2,
 591                                            dep-gcc.tar.bz2)
 592         remove_path : the location to remove the package from.
 593
 594         '''
 595         if remove_path:
 596             remove_path_list = [remove_path]
 597         elif len(self.upload_paths) > 0:
 598             remove_path_list = self.upload_paths
 599         else:
 600             raise error.PackageRemoveError(
 601                 "Invalid path to remove the pkg from")
 602
 603         checksum_path = self._get_checksum_file_path()
 604
 605         if remove_checksum:
 606             self.remove_checksum(pkg_name)
 607
 608         # remove the package and upload the checksum file to the repos
 609         for path in remove_path_list:
 610             self.remove_pkg_file(pkg_name, path)
 611             self.upload_pkg_file(checksum_path, path)
 612
 613
 614     def remove_pkg_file(self, filename, pkg_dir):
 615         '''
 616         Remove the file named filename from pkg_dir
 617         '''
 618         try:
 619             # Remove the file
 620             if pkg_dir.startswith('ssh://'):
 621                 hostline, remote_path = parse_ssh_path(pkg_dir)
 622                 path = os.path.join(remote_path, filename)
 623                 utils.run("ssh %s 'rm -rf %s/%s'" % (hostline, remote_path,
 624                           path))
 625             else:
 626                 os.remove(os.path.join(pkg_dir, filename))
 627         except (IOError, os.error), why:
 628             raise error.PackageRemoveError("Could not remove %s from %s: %s "
 629                                            % (filename, pkg_dir, why))
 630
 631
 632     def get_mirror_list(self, repo_urls):
 633         '''
 634             Stub function for site specific mirrors.
 635
 636             Returns:
 637                 Priority ordered list
 638         '''
 639         return repo_urls
 640
 641
 642     def _get_checksum_file_path(self):
 643         '''
 644         Return the complete path of the checksum file (assumed to be stored
 645         in self.pkgmgr_dir
 646         '''
 647         return os.path.join(self.pkgmgr_dir, CHECKSUM_FILE)
 648
 649
 650     def _get_checksum_dict(self):
 651         '''
 652         Fetch the checksum file if not already fetched. If the checksum file
 653         cannot be fetched from the repos then a new file is created with
 654         the current package's (specified in pkg_path) checksum value in it.
 655         Populate the local checksum dictionary with the values read from
 656         the checksum file.
 657         The checksum file is assumed to be present in self.pkgmgr_dir
 658         '''
 659         checksum_path = self._get_checksum_file_path()
 660         if not self._checksum_dict:
 661             # Fetch the checksum file
 662             try:
 663                 try:
 664                     self._run_command("ls %s" % checksum_path)
 665                 except (error.CmdError, error.AutoservRunError):
 666                     # The packages checksum file does not exist locally.
 667                     # See if it is present in the repositories.
 668                     self.fetch_pkg(CHECKSUM_FILE, checksum_path)
 669             except error.PackageFetchError:
 670                 # This should not happen whilst fetching a package..if a
 671                 # package is present in the repository, the corresponding
 672                 # checksum file should also be automatically present. This
 673                 # case happens only when a package
 674                 # is being uploaded and if it is the first package to be
 675                 # uploaded to the repos (hence no checksum file created yet)
 676                 # Return an empty dictionary in that case
 677                 return {}
 678
 679             # Read the checksum file into memory
 680             checksum_file_contents = self._run_command('cat '
 681                                                        + checksum_path).stdout
 682
 683             # Return {} if we have an empty checksum file present
 684             if not checksum_file_contents.strip():
 685                 return {}
 686
 687             # Parse the checksum file contents into self._checksum_dict
 688             for line in checksum_file_contents.splitlines():
 689                 checksum, package_name = line.split(None, 1)
 690                 self._checksum_dict[package_name] = checksum
 691
 692         return self._checksum_dict
 693
 694
 695     def _save_checksum_dict(self, checksum_dict):
 696         '''
 697         Save the checksum dictionary onto the checksum file. Update the
 698         local _checksum_dict variable with this new set of values.
 699         checksum_dict :  New checksum dictionary
 700         checksum_dir  :  The directory in which to store the checksum file to.
 701         '''
 702         checksum_path = self._get_checksum_file_path()
 703         self._checksum_dict = checksum_dict.copy()
 704         checksum_contents = '\n'.join(checksum + ' ' + pkg_name
 705                                       for pkg_name, checksum in
 706                                       checksum_dict.iteritems())
 707         # Write the checksum file back to disk
 708         self._run_command('echo "%s" > %s' % (checksum_contents,
 709                                               checksum_path),
 710                           _run_command_dargs={'verbose': False})
 711
 712
 713     def compute_checksum(self, pkg_path):
 714         '''
 715         Compute the MD5 checksum for the package file and return it.
 716         pkg_path : The complete path for the package file
 717         '''
 718         md5sum_output = self._run_command("md5sum %s " % pkg_path).stdout
 719         return md5sum_output.split()[0]
 720
 721
 722     def update_checksum(self, pkg_path):
 723         '''
 724         Update the checksum of the package in the packages' checksum
 725         file. This method is called whenever a package is fetched just
 726         to be sure that the checksums in the local file are the latest.
 727         pkg_path : The complete path to the package file.
 728         '''
 729         # Compute the new checksum
 730         new_checksum = self.compute_checksum(pkg_path)
 731         checksum_dict = self._get_checksum_dict()
 732         checksum_dict[os.path.basename(pkg_path)] = new_checksum
 733         self._save_checksum_dict(checksum_dict)
 734
 735
 736     def remove_checksum(self, pkg_name):
 737         '''
 738         Remove the checksum of the package from the packages checksum file.
 739         This method is called whenever a package is removed from the
 740         repositories in order clean its corresponding checksum.
 741         pkg_name :  The name of the package to be removed
 742         '''
 743         checksum_dict = self._get_checksum_dict()
 744         if pkg_name in checksum_dict:
 745             del checksum_dict[pkg_name]
 746         self._save_checksum_dict(checksum_dict)
 747
 748
 749     def compare_checksum(self, pkg_path, repo_url):
 750         '''
 751         Calculate the checksum of the file specified in pkg_path and
 752         compare it with the checksum in the checksum file
 753         Return True if both match else return False.
 754         pkg_path : The full path to the package file for which the
 755                    checksum is being compared
 756         repo_url : The URL to fetch the checksum from
 757         '''
 758         checksum_dict = self._get_checksum_dict()
 759         package_name = os.path.basename(pkg_path)
 760         if not checksum_dict or package_name not in checksum_dict:
 761             return False
 762
 763         repository_checksum = checksum_dict[package_name]
 764         local_checksum = self.compute_checksum(pkg_path)
 765         return (local_checksum == repository_checksum)
 766
 767
 768     def tar_package(self, pkg_name, src_dir, dest_dir, exclude_string=None):
 769         '''
 770         Create a tar.bz2 file with the name 'pkg_name' say test-blah.tar.bz2.
 771         Excludes the directories specified in exclude_string while tarring
 772         the source. Returns the tarball path.
 773         '''
 774         tarball_path = os.path.join(dest_dir, pkg_name)
 775         temp_path = tarball_path + '.tmp'
 776         cmd_list = ['tar', '-cf', temp_path, '-C', src_dir]
 777         if _PBZIP2_AVAILABLE:
 778             cmd_list.append('--use-compress-prog=pbzip2')
 779         else:
 780             cmd_list.append('-j')
 781         if exclude_string is not None:
 782             cmd_list.append(exclude_string)
 783
 784         try:
 785             utils.system(' '.join(cmd_list))
 786         except:
 787             os.unlink(temp_path)
 788             raise
 789
 790         os.rename(temp_path, tarball_path)
 791         return tarball_path
 792
 793
 794     def untar_required(self, tarball_path, dest_dir):
 795         '''
 796         Compare the checksum of the tarball_path with the .checksum file
 797         in the dest_dir and return False if it matches. The untar
 798         of the package happens only if the checksums do not match.
 799         '''
 800         checksum_path = os.path.join(dest_dir, '.checksum')
 801         try:
 802             existing_checksum = self._run_command('cat ' + checksum_path).stdout
 803         except (error.CmdError, error.AutoservRunError):
 804             # If the .checksum file is not present (generally, this should
 805             # not be the case) then return True so that the untar happens
 806             return True
 807
 808         new_checksum = self.compute_checksum(tarball_path)
 809         return (new_checksum.strip() != existing_checksum.strip())
 810
 811
 812     def untar_pkg(self, tarball_path, dest_dir):
 813         '''
 814         Untar the package present in the tarball_path and put a
 815         ".checksum" file in the dest_dir containing the checksum
 816         of the tarball. This method
 817         assumes that the package to be untarred is of the form
 818         <name>.tar.bz2
 819         '''
 820         self._run_command('tar xjf %s -C %s' % (tarball_path, dest_dir))
 821         # Put the .checksum file in the install_dir to note
 822         # where the package came from
 823         pkg_checksum = self.compute_checksum(tarball_path)
 824         pkg_checksum_path = os.path.join(dest_dir,
 825                                          '.checksum')
 826         self._run_command('echo "%s" > %s '
 827                           % (pkg_checksum, pkg_checksum_path))
 828
 829
 830     @staticmethod
 831     def get_tarball_name(name, pkg_type):
 832         """Converts a package name and type into a tarball name.
 833
 834         @param name: The name of the package
 835         @param pkg_type: The type of the package
 836
 837         @returns A tarball filename for that specific type of package
 838         """
 839         assert '-' not in pkg_type
 840         return '%s-%s.tar.bz2' % (pkg_type, name)
 841
 842
 843     @staticmethod
 844     def parse_tarball_name(tarball_name):
 845         """Coverts a package tarball name into a package name and type.
 846
 847         @param tarball_name: The filename of the tarball
 848
 849         @returns (name, pkg_type) where name is the package name and pkg_type
 850             is the package type.
 851         """
 852         match = re.search(r'^([^-]*)-(.*)\.tar\.bz2$', tarball_name)
 853         pkg_type, name = match.groups()
 854         return name, pkg_type
 855
 856
 857     def is_url(self, url):
 858         """Return true if path looks like a URL"""
 859         return url.startswith('http://')
 860
 861
 862     def get_package_name(self, url, pkg_type):
 863         '''
 864         Extract the group and test name for the url. This method is currently
 865         used only for tests.
 866         '''
 867         if pkg_type == 'test':
 868             regex = '[^:]+://(.*)/([^/]*)$'
 869             return self._get_package_name(url, regex)
 870         else:
 871             return ('', url)
 872
 873
 874     def _get_package_name(self, url, regex):
 875         if not self.is_url(url):
 876             if url.endswith('.tar.bz2'):
 877                 testname = url.replace('.tar.bz2', '')
 878                 testname = re.sub(r'(\d*)\.', '', testname)
 879                 return (testname, testname)
 880             else:
 881                 return ('', url)
 882
 883         match = re.match(regex, url)
 884         if not match:
 885             return ('', url)
 886         group, filename = match.groups()
 887         # Generate the group prefix.
 888         group = re.sub(r'\W', '_', group)
 889         # Drop the extension to get the raw test name.
 890         testname = re.sub(r'\.tar\.bz2', '', filename)
 891         # Drop any random numbers at the end of the test name if any
 892         testname = re.sub(r'\.(\d*)', '', testname)
 893         return (group, testname)