cli/job.py

   1 #
   2 # Copyright 2008 Google Inc. All Rights Reserved.
   3
   4 """
   5 The job module contains the objects and methods used to
   6 manage jobs in Autotest.
   7
   8 The valid actions are:
   9 list:    lists job(s)
  10 create:  create a job
  11 abort:   abort job(s)
  12 stat:    detailed listing of job(s)
  13
  14 The common options are:
  15
  16 See topic_common.py for a High Level Design and Algorithm.
  17 """
  18
  19 import getpass, os, pwd, re, socket, sys
  20 from autotest_lib.cli import topic_common, action_common
  21
  22
  23 class job(topic_common.atest):
  24     """Job class
  25     atest job [create|clone|list|stat|abort] <options>"""
  26     usage_action = '[create|clone|list|stat|abort]'
  27     topic = msg_topic = 'job'
  28     msg_items = '<job_ids>'
  29
  30
  31     def _convert_status(self, results):
  32         for result in results:
  33             total = sum(result['status_counts'].values())
  34             status = ['%s=%s(%.1f%%)' % (key, val, 100.0*float(val)/total)
  35                       for key, val in result['status_counts'].iteritems()]
  36             status.sort()
  37             result['status_counts'] = ', '.join(status)
  38
  39
  40     def backward_compatibility(self, action, argv):
  41         """ 'job create --clone' became 'job clone --id' """
  42         if action == 'create':
  43             for option in ['-l', '--clone']:
  44                 if option in argv:
  45                     argv[argv.index(option)] = '--id'
  46                     action = 'clone'
  47         return action
  48
  49
  50 class job_help(job):
  51     """Just here to get the atest logic working.
  52     Usage is set by its parent"""
  53     pass
  54
  55
  56 class job_list_stat(action_common.atest_list, job):
  57     def __init__(self):
  58         super(job_list_stat, self).__init__()
  59
  60         self.topic_parse_info = topic_common.item_parse_info(
  61             attribute_name='jobs',
  62             use_leftover=True)
  63
  64
  65     def __split_jobs_between_ids_names(self):
  66         job_ids = []
  67         job_names = []
  68
  69         # Sort between job IDs and names
  70         for job_id in self.jobs:
  71             if job_id.isdigit():
  72                 job_ids.append(job_id)
  73             else:
  74                 job_names.append(job_id)
  75         return (job_ids, job_names)
  76
  77
  78     def execute_on_ids_and_names(self, op, filters={},
  79                                  check_results={'id__in': 'id',
  80                                                 'name__in': 'id'},
  81                                  tag_id='id__in', tag_name='name__in'):
  82         if not self.jobs:
  83             # Want everything
  84             return super(job_list_stat, self).execute(op=op, filters=filters)
  85
  86         all_jobs = []
  87         (job_ids, job_names) = self.__split_jobs_between_ids_names()
  88
  89         for items, tag in [(job_ids, tag_id),
  90                           (job_names, tag_name)]:
  91             if items:
  92                 new_filters = filters.copy()
  93                 new_filters[tag] = items
  94                 jobs = super(job_list_stat,
  95                              self).execute(op=op,
  96                                            filters=new_filters,
  97                                            check_results=check_results)
  98                 all_jobs.extend(jobs)
  99
 100         return all_jobs
 101
 102
 103 class job_list(job_list_stat):
 104     """atest job list [<jobs>] [--all] [--running] [--user <username>]"""
 105     def __init__(self):
 106         super(job_list, self).__init__()
 107         self.parser.add_option('-a', '--all', help='List jobs for all '
 108                                'users.', action='store_true', default=False)
 109         self.parser.add_option('-r', '--running', help='List only running '
 110                                'jobs', action='store_true')
 111         self.parser.add_option('-u', '--user', help='List jobs for given '
 112                                'user', type='string')
 113
 114
 115     def parse(self):
 116         options, leftover = super(job_list, self).parse()
 117         self.all = options.all
 118         self.data['running'] = options.running
 119         if options.user:
 120             if options.all:
 121                 self.invalid_syntax('Only specify --all or --user, not both.')
 122             else:
 123                 self.data['owner'] = options.user
 124         elif not options.all and not self.jobs:
 125             self.data['owner'] = getpass.getuser()
 126
 127         return options, leftover
 128
 129
 130     def execute(self):
 131         return self.execute_on_ids_and_names(op='get_jobs_summary',
 132                                              filters=self.data)
 133
 134
 135     def output(self, results):
 136         keys = ['id', 'owner', 'name', 'status_counts']
 137         if self.verbose:
 138             keys.extend(['priority', 'control_type', 'created_on'])
 139         self._convert_status(results)
 140         super(job_list, self).output(results, keys)
 141
 142
 143
 144 class job_stat(job_list_stat):
 145     """atest job stat <job>"""
 146     usage_action = 'stat'
 147
 148     def __init__(self):
 149         super(job_stat, self).__init__()
 150         self.parser.add_option('-f', '--control-file',
 151                                help='Display the control file',
 152                                action='store_true', default=False)
 153         self.parser.add_option('-N', '--list-hosts',
 154                                help='Display only a list of hosts',
 155                                action='store_true')
 156         self.parser.add_option('-s', '--list-hosts-status',
 157                                help='Display only the hosts in these statuses '
 158                                'for a job.', action='store')
 159
 160
 161     def parse(self):
 162         status_list = topic_common.item_parse_info(
 163                 attribute_name='status_list',
 164                 inline_option='list_hosts_status')
 165         options, leftover = super(job_stat, self).parse([status_list],
 166                                                         req_items='jobs')
 167
 168         if not self.jobs:
 169             self.invalid_syntax('Must specify at least one job.')
 170
 171         self.show_control_file = options.control_file
 172         self.list_hosts = options.list_hosts
 173
 174         if self.list_hosts and self.status_list:
 175             self.invalid_syntax('--list-hosts is implicit when using '
 176                                 '--list-hosts-status.')
 177         if len(self.jobs) > 1 and (self.list_hosts or self.status_list):
 178             self.invalid_syntax('--list-hosts and --list-hosts-status should '
 179                                 'only be used on a single job.')
 180
 181         return options, leftover
 182
 183
 184     def _merge_results(self, summary, qes):
 185         hosts_status = {}
 186         for qe in qes:
 187             if qe['host']:
 188                 job_id = qe['job']['id']
 189                 hostname = qe['host']['hostname']
 190                 hosts_status.setdefault(job_id,
 191                                         {}).setdefault(qe['status'],
 192                                                        []).append(hostname)
 193
 194         for job in summary:
 195             job_id = job['id']
 196             if hosts_status.has_key(job_id):
 197                 this_job = hosts_status[job_id]
 198                 job['hosts'] = ' '.join(' '.join(host) for host in
 199                                         this_job.itervalues())
 200                 host_per_status = ['%s="%s"' %(status, ' '.join(host))
 201                                    for status, host in this_job.iteritems()]
 202                 job['hosts_status'] = ', '.join(host_per_status)
 203                 if self.status_list:
 204                     statuses = set(s.lower() for s in self.status_list)
 205                     all_hosts = [s for s in host_per_status if s.split('=',
 206                                  1)[0].lower() in statuses]
 207                     job['hosts_selected_status'] = '\n'.join(all_hosts)
 208             else:
 209                 job['hosts_status'] = ''
 210
 211             if not job.get('hosts'):
 212                 self.generic_error('Job has unassigned meta-hosts, '
 213                                    'try again shortly.')
 214
 215         return summary
 216
 217
 218     def execute(self):
 219         summary = self.execute_on_ids_and_names(op='get_jobs_summary')
 220
 221         # Get the real hostnames
 222         qes = self.execute_on_ids_and_names(op='get_host_queue_entries',
 223                                             check_results={},
 224                                             tag_id='job__in',
 225                                             tag_name='job__name__in')
 226
 227         self._convert_status(summary)
 228
 229         return self._merge_results(summary, qes)
 230
 231
 232     def output(self, results):
 233         if self.list_hosts:
 234             keys = ['hosts']
 235         elif self.status_list:
 236             keys = ['hosts_selected_status']
 237         elif not self.verbose:
 238             keys = ['id', 'name', 'priority', 'status_counts', 'hosts_status']
 239         else:
 240             keys = ['id', 'name', 'priority', 'status_counts', 'hosts_status',
 241                     'owner', 'control_type',  'synch_count', 'created_on',
 242                     'run_verify', 'reboot_before', 'reboot_after',
 243                     'parse_failed_repair']
 244
 245         if self.show_control_file:
 246             keys.append('control_file')
 247
 248         super(job_stat, self).output(results, keys)
 249
 250
 251 class job_create_or_clone(action_common.atest_create, job):
 252     """Class containing the code common to the job create and clone actions"""
 253     msg_items = 'job_name'
 254
 255     def __init__(self):
 256         super(job_create_or_clone, self).__init__()
 257         self.hosts = []
 258         self.data_item_key = 'name'
 259         self.parser.add_option('-p', '--priority', help='Job priority (low, '
 260                                'medium, high, urgent), default=medium',
 261                                type='choice', choices=('low', 'medium', 'high',
 262                                'urgent'), default='medium')
 263         self.parser.add_option('-b', '--labels',
 264                                help='Comma separated list of labels '
 265                                'to get machine list from.', default='')
 266         self.parser.add_option('-m', '--machine', help='List of machines to '
 267                                'run on')
 268         self.parser.add_option('-M', '--mlist',
 269                                help='File listing machines to use',
 270                                type='string', metavar='MACHINE_FLIST')
 271         self.parser.add_option('--one-time-hosts',
 272                                help='List of one time hosts')
 273         self.parser.add_option('-e', '--email',
 274                                help='A comma seperated list of '
 275                                'email addresses to notify of job completion',
 276                                default='')
 277
 278
 279     def _parse_hosts(self, args):
 280         """ Parses the arguments to generate a list of hosts and meta_hosts
 281         A host is a regular name, a meta_host is n*label or *label.
 282         These can be mixed on the CLI, and separated by either commas or
 283         spaces, e.g.: 5*Machine_Label host0 5*Machine_Label2,host2 """
 284
 285         hosts = []
 286         meta_hosts = []
 287
 288         for arg in args:
 289             for host in arg.split(','):
 290                 if re.match('^[0-9]+[*]', host):
 291                     num, host = host.split('*', 1)
 292                     meta_hosts += int(num) * [host]
 293                 elif re.match('^[*](\w*)', host):
 294                     meta_hosts += [re.match('^[*](\w*)', host).group(1)]
 295                 elif host != '' and host not in hosts:
 296                     # Real hostname and not a duplicate
 297                     hosts.append(host)
 298
 299         return (hosts, meta_hosts)
 300
 301
 302     def parse(self):
 303         host_info = topic_common.item_parse_info(attribute_name='hosts',
 304                                                  inline_option='machine',
 305                                                  filename_option='mlist')
 306         job_info = topic_common.item_parse_info(attribute_name='jobname',
 307                                                 use_leftover=True)
 308         oth_info = topic_common.item_parse_info(attribute_name='one_time_hosts',
 309                                                 inline_option='one_time_hosts')
 310         label_info = topic_common.item_parse_info(attribute_name='labels',
 311                                                   inline_option='labels')
 312
 313         options, leftover = super(job_create_or_clone,
 314                                   self).parse([host_info, job_info, oth_info,
 315                                                label_info], req_items='jobname')
 316         self.data = {}
 317         if len(self.jobname) > 1:
 318             self.invalid_syntax('Too many arguments specified, only expected '
 319                                 'to receive job name: %s' % self.jobname)
 320         self.jobname = self.jobname[0]
 321
 322         if options.priority:
 323             self.data['priority'] = options.priority.capitalize()
 324
 325         if self.one_time_hosts:
 326             self.data['one_time_hosts'] = self.one_time_hosts
 327
 328         if self.labels:
 329             label_hosts = self.execute_rpc(op='get_hosts',
 330                                            multiple_labels=self.labels)
 331             for host in label_hosts:
 332                 self.hosts.append(host['hostname'])
 333
 334         self.data['name'] = self.jobname
 335
 336         (self.data['hosts'],
 337          self.data['meta_hosts']) = self._parse_hosts(self.hosts)
 338
 339         self.data['email_list'] = options.email
 340
 341         return options, leftover
 342
 343
 344     def create_job(self):
 345         job_id = self.execute_rpc(op='create_job', **self.data)
 346         return ['%s (id %s)' % (self.jobname, job_id)]
 347
 348
 349     def get_items(self):
 350         return [self.jobname]
 351
 352
 353
 354 class job_create(job_create_or_clone):
 355     """atest job create [--priority <Low|Medium|High|Urgent>]
 356     [--synch_count] [--control-file </path/to/cfile>]
 357     [--on-server] [--test <test1,test2>] [--kernel <http://kernel>]
 358     [--mlist </path/to/machinelist>] [--machine <host1 host2 host3>]
 359     [--labels <list of labels of machines to run on>]
 360     [--reboot_before <option>] [--reboot_after <option>]
 361     [--noverify] [--timeout <timeout>] [--max_runtime <max runtime>]
 362     [--one-time-hosts <hosts>] [--email <email>]
 363     [--dependencies <labels this job is dependent on>]
 364     [--atomic_group <atomic group name>] [--parse-failed-repair <option>]
 365     job_name
 366
 367     Creating a job is rather different from the other create operations,
 368     so it only uses the __init__() and output() from its superclass.
 369     """
 370     op_action = 'create'
 371
 372     def __init__(self):
 373         super(job_create, self).__init__()
 374         self.ctrl_file_data = {}
 375         self.parser.add_option('-y', '--synch_count', type=int,
 376                                help='Number of machines to use per autoserv '
 377                                     'execution')
 378         self.parser.add_option('-f', '--control-file',
 379                                help='use this control file', metavar='FILE')
 380         self.parser.add_option('-s', '--server',
 381                                help='This is server-side job',
 382                                action='store_true', default=False)
 383         self.parser.add_option('-t', '--test',
 384                                help='List of tests to run')
 385
 386         self.parser.add_option('-k', '--kernel', help='A comma separated list'
 387                                ' of kernel versions/URLs/filenames to run the'
 388                                ' job on')
 389         self.parser.add_option('--kernel-cmdline', help='A string that will be'
 390                                ' given as cmdline to the booted kernel(s)'
 391                                ' specified by the -k option')
 392
 393         self.parser.add_option('-d', '--dependencies', help='Comma separated '
 394                                'list of labels this job is dependent on.',
 395                                default='')
 396         self.parser.add_option('-G', '--atomic_group', help='Name of an Atomic '
 397                                'Group to schedule this job on.',
 398                                default='')
 399
 400         self.parser.add_option('-B', '--reboot_before',
 401                                help='Whether or not to reboot the machine '
 402                                     'before the job (never/if dirty/always)',
 403                                type='choice',
 404                                choices=('never', 'if dirty', 'always'))
 405         self.parser.add_option('-a', '--reboot_after',
 406                                help='Whether or not to reboot the machine '
 407                                     'after the job (never/if all tests passed/'
 408                                     'always)',
 409                                type='choice',
 410                                choices=('never', 'if all tests passed',
 411                                         'always'))
 412
 413         self.parser.add_option('--parse-failed-repair',
 414                                help='Whether or not to parse failed repair '
 415                                     'results as part of the job',
 416                                type='choice',
 417                                choices=('true', 'false'))
 418         self.parser.add_option('-n', '--noverify',
 419                                help='Do not run verify for job',
 420                                default=False, action='store_true')
 421         self.parser.add_option('-o', '--timeout', help='Job timeout in hours.',
 422                                metavar='TIMEOUT')
 423         self.parser.add_option('--max_runtime',
 424                                help='Job maximum runtime in hours')
 425
 426
 427     @staticmethod
 428     def _get_kernel_data(kernel_list, cmdline):
 429         # the RPC supports cmdline per kernel version in a dictionary
 430         kernels = []
 431         for version in re.split(r'[, ]+', kernel_list):
 432             if not version:
 433                 continue
 434             kernel_info = {'version': version}
 435             if cmdline:
 436                 kernel_info['cmdline'] = cmdline
 437             kernels.append(kernel_info)
 438
 439         return kernels
 440
 441
 442     def parse(self):
 443         options, leftover = super(job_create, self).parse()
 444
 445         if (len(self.hosts) == 0 and not self.one_time_hosts
 446             and not options.labels and not options.atomic_group):
 447             self.invalid_syntax('Must specify at least one machine '
 448                                 'or an atomic group '
 449                                 '(-m, -M, -b, -G or --one-time-hosts).')
 450         if not options.control_file and not options.test:
 451             self.invalid_syntax('Must specify either --test or --control-file'
 452                                 ' to create a job.')
 453         if options.control_file and options.test:
 454             self.invalid_syntax('Can only specify one of --control-file or '
 455                                 '--test, not both.')
 456         if options.kernel:
 457             self.ctrl_file_data['kernel'] = self._get_kernel_data(
 458                     options.kernel, options.kernel_cmdline)
 459             self.ctrl_file_data['do_push_packages'] = True
 460         if options.control_file:
 461             try:
 462                 control_file_f = open(options.control_file)
 463                 try:
 464                     control_file_data = control_file_f.read()
 465                 finally:
 466                     control_file_f.close()
 467             except IOError:
 468                 self.generic_error('Unable to read from specified '
 469                                    'control-file: %s' % options.control_file)
 470             if options.kernel:
 471                 # execute() will pass this to the AFE server to wrap this
 472                 # control file up to include the kernel installation steps.
 473                 self.ctrl_file_data['client_control_file'] = control_file_data
 474             else:
 475                 self.data['control_file'] = control_file_data
 476         if options.test:
 477             if options.server:
 478                 self.invalid_syntax('If you specify tests, then the '
 479                                     'client/server setting is implicit and '
 480                                     'cannot be overriden.')
 481             tests = [t.strip() for t in options.test.split(',') if t.strip()]
 482             self.ctrl_file_data['tests'] = tests
 483
 484
 485         if options.reboot_before:
 486             self.data['reboot_before'] = options.reboot_before.capitalize()
 487         if options.reboot_after:
 488             self.data['reboot_after'] = options.reboot_after.capitalize()
 489         if options.parse_failed_repair:
 490             self.data['parse_failed_repair'] = (
 491                 options.parse_failed_repair == 'true')
 492         if options.noverify:
 493             self.data['run_verify'] = False
 494         if options.timeout:
 495             self.data['timeout'] = options.timeout
 496         if options.max_runtime:
 497             self.data['max_runtime_hrs'] = options.max_runtime
 498
 499         if options.atomic_group:
 500             self.data['atomic_group_name'] = options.atomic_group
 501
 502         deps = options.dependencies.split(',')
 503         deps = [dep.strip() for dep in deps if dep.strip()]
 504         self.data['dependencies'] = deps
 505
 506         if options.synch_count:
 507             self.data['synch_count'] = options.synch_count
 508         if options.server:
 509             self.data['control_type'] = 'Server'
 510         else:
 511             self.data['control_type'] = 'Client'
 512
 513         return options, leftover
 514
 515
 516     def execute(self):
 517         if self.ctrl_file_data:
 518             uploading_kernel = 'kernel' in self.ctrl_file_data
 519             if uploading_kernel:
 520                 default_timeout = socket.getdefaulttimeout()
 521                 socket.setdefaulttimeout(topic_common.UPLOAD_SOCKET_TIMEOUT)
 522                 print 'Uploading Kernel: this may take a while...',
 523                 sys.stdout.flush()
 524             try:
 525                 cf_info = self.execute_rpc(op='generate_control_file',
 526                                            item=self.jobname,
 527                                            **self.ctrl_file_data)
 528             finally:
 529                 if uploading_kernel:
 530                     socket.setdefaulttimeout(default_timeout)
 531
 532             if uploading_kernel:
 533                 print 'Done'
 534             self.data['control_file'] = cf_info['control_file']
 535             if 'synch_count' not in self.data:
 536                 self.data['synch_count'] = cf_info['synch_count']
 537             if cf_info['is_server']:
 538                 self.data['control_type'] = 'Server'
 539             else:
 540                 self.data['control_type'] = 'Client'
 541
 542             # Get the union of the 2 sets of dependencies
 543             deps = set(self.data['dependencies'])
 544             deps = sorted(deps.union(cf_info['dependencies']))
 545             self.data['dependencies'] = list(deps)
 546
 547         if 'synch_count' not in self.data:
 548             self.data['synch_count'] = 1
 549
 550         return self.create_job()
 551
 552
 553 class job_clone(job_create_or_clone):
 554     """atest job clone [--priority <Low|Medium|High|Urgent>]
 555     [--mlist </path/to/machinelist>] [--machine <host1 host2 host3>]
 556     [--labels <list of labels of machines to run on>]
 557     [--one-time-hosts <hosts>] [--email <email>]
 558     job_name
 559
 560     Cloning a job is rather different from the other create operations,
 561     so it only uses the __init__() and output() from its superclass.
 562     """
 563     op_action = 'clone'
 564     usage_action = 'clone'
 565
 566     def __init__(self):
 567         super(job_clone, self).__init__()
 568         self.parser.add_option('-i', '--id', help='Job id to clone',
 569                                default=False,
 570                                metavar='JOB_ID')
 571         self.parser.add_option('-r', '--reuse-hosts',
 572                                help='Use the exact same hosts as the '
 573                                'cloned job.',
 574                                action='store_true', default=False)
 575
 576
 577     def parse(self):
 578         options, leftover = super(job_clone, self).parse()
 579
 580         self.clone_id = options.id
 581         self.reuse_hosts = options.reuse_hosts
 582
 583         host_specified = self.hosts or self.one_time_hosts or options.labels
 584         if self.reuse_hosts and host_specified:
 585             self.invalid_syntax('Cannot specify hosts and reuse the same '
 586                                 'ones as the cloned job.')
 587
 588         if not (self.reuse_hosts or host_specified):
 589             self.invalid_syntax('Must reuse or specify at least one '
 590                                 'machine (-r, -m, -M, -b or '
 591                                 '--one-time-hosts).')
 592
 593         return options, leftover
 594
 595
 596     def execute(self):
 597         clone_info = self.execute_rpc(op='get_info_for_clone',
 598                                       id=self.clone_id,
 599                                       preserve_metahosts=self.reuse_hosts)
 600
 601         # Remove fields from clone data that cannot be reused
 602         for field in ('name', 'created_on', 'id', 'owner'):
 603             del clone_info['job'][field]
 604
 605         # Also remove parameterized_job field, as the feature still is
 606         # incomplete, this tool does not attempt to support it for now,
 607         # it uses a different API function and it breaks create_job()
 608         if clone_info['job'].has_key('parameterized_job'):
 609             del clone_info['job']['parameterized_job']
 610
 611         # Keyword args cannot be unicode strings
 612         self.data.update((str(key), val)
 613                          for key, val in clone_info['job'].iteritems())
 614
 615         if self.reuse_hosts:
 616             # Convert host list from clone info that can be used for job_create
 617             for label, qty in clone_info['meta_host_counts'].iteritems():
 618                 self.data['meta_hosts'].extend([label]*qty)
 619
 620             self.data['hosts'].extend(host['hostname']
 621                                       for host in clone_info['hosts'])
 622
 623         return self.create_job()
 624
 625
 626 class job_abort(job, action_common.atest_delete):
 627     """atest job abort <job(s)>"""
 628     usage_action = op_action = 'abort'
 629     msg_done = 'Aborted'
 630
 631     def parse(self):
 632         job_info = topic_common.item_parse_info(attribute_name='jobids',
 633                                                 use_leftover=True)
 634         options, leftover = super(job_abort, self).parse([job_info],
 635                                                          req_items='jobids')
 636
 637
 638     def execute(self):
 639         data = {'job__id__in': self.jobids}
 640         self.execute_rpc(op='abort_host_queue_entries', **data)
 641         print 'Aborting jobs: %s' % ', '.join(self.jobids)
 642
 643
 644     def get_items(self):
 645         return self.jobids