buildtools/wafadmin/Task.py

   1 #!/usr/bin/env python
   2 # encoding: utf-8
   3 # Thomas Nagy, 2005-2008 (ita)
   4
   5 """
   6 Running tasks in parallel is a simple problem, but in practice it is more complicated:
   7 * dependencies discovered during the build (dynamic task creation)
   8 * dependencies discovered after files are compiled
   9 * the amount of tasks and dependencies (graph size) can be huge
  10
  11 This is why the dependency management is split on three different levels:
  12 1. groups of tasks that run all after another group of tasks
  13 2. groups of tasks that can be run in parallel
  14 3. tasks that can run in parallel, but with possible unknown ad-hoc dependencies
  15
  16 The point #1 represents a strict sequential order between groups of tasks, for example a compiler is produced
  17 and used to compile the rest, whereas #2 and #3 represent partial order constraints where #2 applies to the kind of task
  18 and #3 applies to the task instances.
  19
  20 #1 is held by the task manager: ordered list of TaskGroups (see bld.add_group)
  21 #2 is held by the task groups and the task types: precedence after/before (topological sort),
  22    and the constraints extracted from file extensions
  23 #3 is held by the tasks individually (attribute run_after),
  24    and the scheduler (Runner.py) use Task::runnable_status to reorder the tasks
  25
  26 --
  27
  28 To try, use something like this in your code:
  29 import Constants, Task
  30 Task.algotype = Constants.MAXPARALLEL
  31
  32 --
  33
  34 There are two concepts with the tasks (individual units of change):
  35 * dependency (if 1 is recompiled, recompile 2)
  36 * order (run 2 after 1)
  37
  38 example 1: if t1 depends on t2 and t2 depends on t3 it is not necessary to make t1 depend on t3 (dependency is transitive)
  39 example 2: if t1 depends on a node produced by t2, it is not immediately obvious that t1 must run after t2 (order is not obvious)
  40
  41 The role of the Task Manager is to give the tasks in order (groups of task that may be run in parallel one after the other)
  42
  43 """
  44
  45 import os, shutil, sys, re, random, datetime, tempfile, shlex
  46 from Utils import md5
  47 import Build, Runner, Utils, Node, Logs, Options
  48 from Logs import debug, warn, error
  49 from Constants import *
  50
  51 algotype = NORMAL
  52 #algotype = JOBCONTROL
  53 #algotype = MAXPARALLEL
  54
  55 COMPILE_TEMPLATE_SHELL = '''
  56 def f(task):
  57         env = task.env
  58         wd = getattr(task, 'cwd', None)
  59         p = env.get_flat
  60         cmd = \'\'\' %s \'\'\' % s
  61         return task.exec_command(cmd, cwd=wd)
  62 '''
  63
  64 COMPILE_TEMPLATE_NOSHELL = '''
  65 def f(task):
  66         env = task.env
  67         wd = getattr(task, 'cwd', None)
  68         def to_list(xx):
  69                 if isinstance(xx, str): return [xx]
  70                 return xx
  71         lst = []
  72         %s
  73         lst = [x for x in lst if x]
  74         return task.exec_command(lst, cwd=wd)
  75 '''
  76
  77
  78 """
  79 Enable different kind of dependency algorithms:
  80 1 make groups: first compile all cpps and then compile all links (NORMAL)
  81 2 parallelize all (each link task run after its dependencies) (MAXPARALLEL)
  82 3 like 1 but provide additional constraints for the parallelization (MAXJOBS)
  83
  84 In theory 1. will be faster than 2 for waf, but might be slower for builds
  85 The scheme 2 will not allow for running tasks one by one so it can cause disk thrashing on huge builds
  86 """
  87
  88 file_deps = Utils.nada
  89 """
  90 Additional dependency pre-check may be added by replacing the function file_deps.
  91 e.g. extract_outputs, extract_deps below.
  92 """
  93
  94 class TaskManager(object):
  95         """The manager is attached to the build object, it holds a list of TaskGroup"""
  96         def __init__(self):
  97                 self.groups = []
  98                 self.tasks_done = []
  99                 self.current_group = 0
 100                 self.groups_names = {}
 101
 102         def group_name(self, g):
 103                 """name for the group g (utility)"""
 104                 if not isinstance(g, TaskGroup):
 105                         g = self.groups[g]
 106                 for x in self.groups_names:
 107                         if id(self.groups_names[x]) == id(g):
 108                                 return x
 109                 return ''
 110
 111         def group_idx(self, tg):
 112                 """group the task generator tg is in"""
 113                 se = id(tg)
 114                 for i in range(len(self.groups)):
 115                         g = self.groups[i]
 116                         for t in g.tasks_gen:
 117                                 if id(t) == se:
 118                                         return i
 119                 return None
 120
 121         def get_next_set(self):
 122                 """return the next set of tasks to execute
 123                 the first parameter is the maximum amount of parallelization that may occur"""
 124                 ret = None
 125                 while not ret and self.current_group < len(self.groups):
 126                         ret = self.groups[self.current_group].get_next_set()
 127                         if ret: return ret
 128                         else:
 129                                 self.groups[self.current_group].process_install()
 130                                 self.current_group += 1
 131                 return (None, None)
 132
 133         def add_group(self, name=None, set=True):
 134                 #if self.groups and not self.groups[0].tasks:
 135                 #       error('add_group: an empty group is already present')
 136                 g = TaskGroup()
 137
 138                 if name and name in self.groups_names:
 139                         error('add_group: name %s already present' % name)
 140                 self.groups_names[name] = g
 141                 self.groups.append(g)
 142                 if set:
 143                         self.current_group = len(self.groups) - 1
 144
 145         def set_group(self, idx):
 146                 if isinstance(idx, str):
 147                         g = self.groups_names[idx]
 148                         for x in xrange(len(self.groups)):
 149                                 if id(g) == id(self.groups[x]):
 150                                         self.current_group = x
 151                 else:
 152                         self.current_group = idx
 153
 154         def add_task_gen(self, tgen):
 155                 if not self.groups: self.add_group()
 156                 self.groups[self.current_group].tasks_gen.append(tgen)
 157
 158         def add_task(self, task):
 159                 if not self.groups: self.add_group()
 160                 self.groups[self.current_group].tasks.append(task)
 161
 162         def total(self):
 163                 total = 0
 164                 if not self.groups: return 0
 165                 for group in self.groups:
 166                         total += len(group.tasks)
 167                 return total
 168
 169         def add_finished(self, tsk):
 170                 self.tasks_done.append(tsk)
 171                 bld = tsk.generator.bld
 172                 if bld.is_install:
 173                         f = None
 174                         if 'install' in tsk.__dict__:
 175                                 f = tsk.__dict__['install']
 176                                 # install=0 to prevent installation
 177                                 if f: f(tsk)
 178                         else:
 179                                 tsk.install()
 180
 181 class TaskGroup(object):
 182         "the compilation of one group does not begin until the previous group has finished (in the manager)"
 183         def __init__(self):
 184                 self.tasks = [] # this list will be consumed
 185                 self.tasks_gen = []
 186
 187                 self.cstr_groups = Utils.DefaultDict(list) # tasks having equivalent constraints
 188                 self.cstr_order = Utils.DefaultDict(set) # partial order between the cstr groups
 189                 self.temp_tasks = [] # tasks put on hold
 190                 self.ready = 0
 191                 self.post_funs = []
 192
 193         def reset(self):
 194                 "clears the state of the object (put back the tasks into self.tasks)"
 195                 for x in self.cstr_groups:
 196                         self.tasks += self.cstr_groups[x]
 197                 self.tasks = self.temp_tasks + self.tasks
 198                 self.temp_tasks = []
 199                 self.cstr_groups = Utils.DefaultDict(list)
 200                 self.cstr_order = Utils.DefaultDict(set)
 201                 self.ready = 0
 202
 203         def process_install(self):
 204                 for (f, k, kw) in self.post_funs:
 205                         f(*k, **kw)
 206
 207         def prepare(self):
 208                 "prepare the scheduling"
 209                 self.ready = 1
 210                 file_deps(self.tasks)
 211                 self.make_cstr_groups()
 212                 self.extract_constraints()
 213
 214         def get_next_set(self):
 215                 "next list of tasks to execute using max job settings, returns (maxjobs, task_list)"
 216                 global algotype
 217                 if algotype == NORMAL:
 218                         tasks = self.tasks_in_parallel()
 219                         maxj = MAXJOBS
 220                 elif algotype == JOBCONTROL:
 221                         (maxj, tasks) = self.tasks_by_max_jobs()
 222                 elif algotype == MAXPARALLEL:
 223                         tasks = self.tasks_with_inner_constraints()
 224                         maxj = MAXJOBS
 225                 else:
 226                         raise Utils.WafError("unknown algorithm type %s" % (algotype))
 227
 228                 if not tasks: return ()
 229                 return (maxj, tasks)
 230
 231         def make_cstr_groups(self):
 232                 "unite the tasks that have similar constraints"
 233                 self.cstr_groups = Utils.DefaultDict(list)
 234                 for x in self.tasks:
 235                         h = x.hash_constraints()
 236                         self.cstr_groups[h].append(x)
 237
 238         def set_order(self, a, b):
 239                 self.cstr_order[a].add(b)
 240
 241         def compare_exts(self, t1, t2):
 242                 "extension production"
 243                 x = "ext_in"
 244                 y = "ext_out"
 245                 in_ = t1.attr(x, ())
 246                 out_ = t2.attr(y, ())
 247                 for k in in_:
 248                         if k in out_:
 249                                 return -1
 250                 in_ = t2.attr(x, ())
 251                 out_ = t1.attr(y, ())
 252                 for k in in_:
 253                         if k in out_:
 254                                 return 1
 255                 return 0
 256
 257         def compare_partial(self, t1, t2):
 258                 "partial relations after/before"
 259                 m = "after"
 260                 n = "before"
 261                 name = t2.__class__.__name__
 262                 if name in Utils.to_list(t1.attr(m, ())): return -1
 263                 elif name in Utils.to_list(t1.attr(n, ())): return 1
 264                 name = t1.__class__.__name__
 265                 if name in Utils.to_list(t2.attr(m, ())): return 1
 266                 elif name in Utils.to_list(t2.attr(n, ())): return -1
 267                 return 0
 268
 269         def extract_constraints(self):
 270                 "extract the parallelization constraints from the tasks with different constraints"
 271                 keys = self.cstr_groups.keys()
 272                 max = len(keys)
 273                 # hopefully the length of this list is short
 274                 for i in xrange(max):
 275                         t1 = self.cstr_groups[keys[i]][0]
 276                         for j in xrange(i + 1, max):
 277                                 t2 = self.cstr_groups[keys[j]][0]
 278
 279                                 # add the constraints based on the comparisons
 280                                 val = (self.compare_exts(t1, t2)
 281                                         or self.compare_partial(t1, t2)
 282                                         )
 283                                 if val > 0:
 284                                         self.set_order(keys[i], keys[j])
 285                                 elif val < 0:
 286                                         self.set_order(keys[j], keys[i])
 287
 288         def tasks_in_parallel(self):
 289                 "(NORMAL) next list of tasks that may be executed in parallel"
 290
 291                 if not self.ready: self.prepare()
 292
 293                 keys = self.cstr_groups.keys()
 294
 295                 unconnected = []
 296                 remainder = []
 297
 298                 for u in keys:
 299                         for k in self.cstr_order.values():
 300                                 if u in k:
 301                                         remainder.append(u)
 302                                         break
 303                         else:
 304                                 unconnected.append(u)
 305
 306                 toreturn = []
 307                 for y in unconnected:
 308                         toreturn.extend(self.cstr_groups[y])
 309
 310                 # remove stuff only after
 311                 for y in unconnected:
 312                                 try: self.cstr_order.__delitem__(y)
 313                                 except KeyError: pass
 314                                 self.cstr_groups.__delitem__(y)
 315
 316                 if not toreturn and remainder:
 317                         raise Utils.WafError("circular order constraint detected %r" % remainder)
 318
 319                 return toreturn
 320
 321         def tasks_by_max_jobs(self):
 322                 "(JOBCONTROL) returns the tasks that can run in parallel with the max amount of jobs"
 323                 if not self.ready: self.prepare()
 324                 if not self.temp_tasks: self.temp_tasks = self.tasks_in_parallel()
 325                 if not self.temp_tasks: return (None, None)
 326
 327                 maxjobs = MAXJOBS
 328                 ret = []
 329                 remaining = []
 330                 for t in self.temp_tasks:
 331                         m = getattr(t, "maxjobs", getattr(self.__class__, "maxjobs", MAXJOBS))
 332                         if m > maxjobs:
 333                                 remaining.append(t)
 334                         elif m < maxjobs:
 335                                 remaining += ret
 336                                 ret = [t]
 337                                 maxjobs = m
 338                         else:
 339                                 ret.append(t)
 340                 self.temp_tasks = remaining
 341                 return (maxjobs, ret)
 342
 343         def tasks_with_inner_constraints(self):
 344                 """(MAXPARALLEL) returns all tasks in this group, but add the constraints on each task instance
 345                 as an optimization, it might be desirable to discard the tasks which do not have to run"""
 346                 if not self.ready: self.prepare()
 347
 348                 if getattr(self, "done", None): return None
 349
 350                 for p in self.cstr_order:
 351                         for v in self.cstr_order[p]:
 352                                 for m in self.cstr_groups[p]:
 353                                         for n in self.cstr_groups[v]:
 354                                                 n.set_run_after(m)
 355                 self.cstr_order = Utils.DefaultDict(set)
 356                 self.cstr_groups = Utils.DefaultDict(list)
 357                 self.done = 1
 358                 return self.tasks[:] # make a copy
 359
 360 class store_task_type(type):
 361         "store the task types that have a name ending in _task into a map (remember the existing task types)"
 362         def __init__(cls, name, bases, dict):
 363                 super(store_task_type, cls).__init__(name, bases, dict)
 364                 name = cls.__name__
 365
 366                 if name.endswith('_task'):
 367                         name = name.replace('_task', '')
 368                 if name != 'TaskBase':
 369                         TaskBase.classes[name] = cls
 370
 371 class TaskBase(object):
 372         """Base class for all Waf tasks
 373
 374         The most important methods are (by usual order of call):
 375         1 runnable_status: ask the task if it should be run, skipped, or if we have to ask later
 376         2 __str__: string to display to the user
 377         3 run: execute the task
 378         4 post_run: after the task is run, update the cache about the task
 379
 380         This class should be seen as an interface, it provides the very minimum necessary for the scheduler
 381         so it does not do much.
 382
 383         For illustration purposes, TaskBase instances try to execute self.fun (if provided)
 384         """
 385
 386         __metaclass__ = store_task_type
 387
 388         color = "GREEN"
 389         maxjobs = MAXJOBS
 390         classes = {}
 391         stat = None
 392
 393         def __init__(self, *k, **kw):
 394                 self.hasrun = NOT_RUN
 395
 396                 try:
 397                         self.generator = kw['generator']
 398                 except KeyError:
 399                         self.generator = self
 400                         self.bld = Build.bld
 401
 402                 if kw.get('normal', 1):
 403                         self.generator.bld.task_manager.add_task(self)
 404
 405         def __repr__(self):
 406                 "used for debugging"
 407                 return '\n\t{task: %s %s}' % (self.__class__.__name__, str(getattr(self, "fun", "")))
 408
 409         def __str__(self):
 410                 "string to display to the user"
 411                 if hasattr(self, 'fun'):
 412                         return 'executing: %s\n' % self.fun.__name__
 413                 return self.__class__.__name__ + '\n'
 414
 415         def exec_command(self, *k, **kw):
 416                 "use this for executing commands from tasks"
 417                 # TODO in waf 1.6, eliminate bld.exec_command, and move the cwd processing to here
 418                 if self.env['env']:
 419                         kw['env'] = self.env['env']
 420                 return self.generator.bld.exec_command(*k, **kw)
 421
 422         def runnable_status(self):
 423                 "RUN_ME SKIP_ME or ASK_LATER"
 424                 return RUN_ME
 425
 426         def can_retrieve_cache(self):
 427                 return False
 428
 429         def call_run(self):
 430                 if self.can_retrieve_cache():
 431                         return 0
 432                 return self.run()
 433
 434         def run(self):
 435                 "called if the task must run"
 436                 if hasattr(self, 'fun'):
 437                         return self.fun(self)
 438                 return 0
 439
 440         def post_run(self):
 441                 "update the dependency tree (node stats)"
 442                 pass
 443
 444         def display(self):
 445                 "print either the description (using __str__) or the progress bar or the ide output"
 446                 col1 = Logs.colors(self.color)
 447                 col2 = Logs.colors.NORMAL
 448
 449                 if Options.options.progress_bar == 1:
 450                         return self.generator.bld.progress_line(self.position[0], self.position[1], col1, col2)
 451
 452                 if Options.options.progress_bar == 2:
 453                         ela = Utils.get_elapsed_time(self.generator.bld.ini)
 454                         try:
 455                                 ins  = ','.join([n.name for n in self.inputs])
 456                         except AttributeError:
 457                                 ins = ''
 458                         try:
 459                                 outs = ','.join([n.name for n in self.outputs])
 460                         except AttributeError:
 461                                 outs = ''
 462                         return '|Total %s|Current %s|Inputs %s|Outputs %s|Time %s|\n' % (self.position[1], self.position[0], ins, outs, ela)
 463
 464                 total = self.position[1]
 465                 n = len(str(total))
 466                 fs = '[%%%dd/%%%dd] %%s%%s%%s' % (n, n)
 467                 return fs % (self.position[0], self.position[1], col1, str(self), col2)
 468
 469         def attr(self, att, default=None):
 470                 "retrieve an attribute from the instance or from the class (microoptimization here)"
 471                 ret = getattr(self, att, self)
 472                 if ret is self: return getattr(self.__class__, att, default)
 473                 return ret
 474
 475         def hash_constraints(self):
 476                 "identify a task type for all the constraints relevant for the scheduler: precedence, file production"
 477                 a = self.attr
 478                 sum = hash((self.__class__.__name__,
 479                         str(a('before', '')),
 480                         str(a('after', '')),
 481                         str(a('ext_in', '')),
 482                         str(a('ext_out', '')),
 483                         self.__class__.maxjobs))
 484                 return sum
 485
 486         def format_error(self):
 487                 "error message to display to the user (when a build fails)"
 488                 if getattr(self, "err_msg", None):
 489                         return self.err_msg
 490                 elif self.hasrun == CRASHED:
 491                         try:
 492                                 return " -> task failed (err #%d): %r" % (self.err_code, self)
 493                         except AttributeError:
 494                                 return " -> task failed: %r" % self
 495                 elif self.hasrun == MISSING:
 496                         return " -> missing files: %r" % self
 497                 else:
 498                         return ''
 499
 500         def install(self):
 501                 """
 502                 installation is performed by looking at the task attributes:
 503                 * install_path: installation path like "${PREFIX}/bin"
 504                 * filename: install the first node in the outputs as a file with a particular name, be certain to give os.sep
 505                 * chmod: permissions
 506                 """
 507                 bld = self.generator.bld
 508                 d = self.attr('install')
 509
 510                 if self.attr('install_path'):
 511                         lst = [a.relpath_gen(bld.srcnode) for a in self.outputs]
 512                         perm = self.attr('chmod', O644)
 513                         if self.attr('src'):
 514                                 # if src is given, install the sources too
 515                                 lst += [a.relpath_gen(bld.srcnode) for a in self.inputs]
 516                         if self.attr('filename'):
 517                                 dir = self.install_path.rstrip(os.sep) + os.sep + self.attr('filename')
 518                                 bld.install_as(dir, lst[0], self.env, perm)
 519                         else:
 520                                 bld.install_files(self.install_path, lst, self.env, perm)
 521
 522 class Task(TaskBase):
 523         """The parent class is quite limited, in this version:
 524         * file system interaction: input and output nodes
 525         * persistence: do not re-execute tasks that have already run
 526         * caching: same files can be saved and retrieved from a cache directory
 527         * dependencies:
 528                 implicit, like .c files depending on .h files
 529                 explicit, like the input nodes or the dep_nodes
 530                 environment variables, like the CXXFLAGS in self.env
 531         """
 532         vars = []
 533         def __init__(self, env, **kw):
 534                 TaskBase.__init__(self, **kw)
 535                 self.env = env
 536
 537                 # inputs and outputs are nodes
 538                 # use setters when possible
 539                 self.inputs  = []
 540                 self.outputs = []
 541
 542                 self.dep_nodes = []
 543                 self.run_after = []
 544
 545                 # Additionally, you may define the following
 546                 #self.dep_vars  = 'PREFIX DATADIR'
 547
 548         def __str__(self):
 549                 "string to display to the user"
 550                 env = self.env
 551                 src_str = ' '.join([a.nice_path(env) for a in self.inputs])
 552                 tgt_str = ' '.join([a.nice_path(env) for a in self.outputs])
 553                 if self.outputs: sep = ' -> '
 554                 else: sep = ''
 555                 return '%s: %s%s%s\n' % (self.__class__.__name__.replace('_task', ''), src_str, sep, tgt_str)
 556
 557         def __repr__(self):
 558                 return "".join(['\n\t{task: ', self.__class__.__name__, " ", ",".join([x.name for x in self.inputs]), " -> ", ",".join([x.name for x in self.outputs]), '}'])
 559
 560         def unique_id(self):
 561                 "get a unique id: hash the node paths, the variant, the class, the function"
 562                 try:
 563                         return self.uid
 564                 except AttributeError:
 565                         "this is not a real hot zone, but we want to avoid surprizes here"
 566                         m = md5()
 567                         up = m.update
 568                         up(self.__class__.__name__)
 569                         up(self.env.variant())
 570                         p = None
 571                         for x in self.inputs + self.outputs:
 572                                 if p != x.parent.id:
 573                                         p = x.parent.id
 574                                         up(x.parent.abspath())
 575                                 up(x.name)
 576                         self.uid = m.digest()
 577                         return self.uid
 578
 579         def set_inputs(self, inp):
 580                 if isinstance(inp, list): self.inputs += inp
 581                 else: self.inputs.append(inp)
 582
 583         def set_outputs(self, out):
 584                 if isinstance(out, list): self.outputs += out
 585                 else: self.outputs.append(out)
 586
 587         def set_run_after(self, task):
 588                 "set (scheduler) order on another task"
 589                 # TODO: handle list or object
 590                 assert isinstance(task, TaskBase)
 591                 self.run_after.append(task)
 592
 593         def add_file_dependency(self, filename):
 594                 "TODO user-provided file dependencies"
 595                 node = self.generator.bld.path.find_resource(filename)
 596                 self.dep_nodes.append(node)
 597
 598         def signature(self):
 599                 # compute the result one time, and suppose the scan_signature will give the good result
 600                 try: return self.cache_sig[0]
 601                 except AttributeError: pass
 602
 603                 self.m = md5()
 604
 605                 # explicit deps
 606                 exp_sig = self.sig_explicit_deps()
 607
 608                 # env vars
 609                 var_sig = self.sig_vars()
 610
 611                 # implicit deps
 612
 613                 imp_sig = SIG_NIL
 614                 if self.scan:
 615                         try:
 616                                 imp_sig = self.sig_implicit_deps()
 617                         except ValueError:
 618                                 return self.signature()
 619
 620                 # we now have the signature (first element) and the details (for debugging)
 621                 ret = self.m.digest()
 622                 self.cache_sig = (ret, exp_sig, imp_sig, var_sig)
 623                 return ret
 624
 625         def runnable_status(self):
 626                 "SKIP_ME RUN_ME or ASK_LATER"
 627                 #return 0 # benchmarking
 628
 629                 if self.inputs and (not self.outputs):
 630                         if not getattr(self.__class__, 'quiet', None):
 631                                 warn("invalid task (no inputs OR outputs): override in a Task subclass or set the attribute 'quiet' %r" % self)
 632
 633                 for t in self.run_after:
 634                         if not t.hasrun:
 635                                 return ASK_LATER
 636
 637                 env = self.env
 638                 bld = self.generator.bld
 639
 640                 # first compute the signature
 641                 new_sig = self.signature()
 642
 643                 # compare the signature to a signature computed previously
 644                 key = self.unique_id()
 645                 try:
 646                         prev_sig = bld.task_sigs[key][0]
 647                 except KeyError:
 648                         debug("task: task %r must run as it was never run before or the task code changed", self)
 649                         return RUN_ME
 650
 651                 # compare the signatures of the outputs
 652                 for node in self.outputs:
 653                         variant = node.variant(env)
 654                         try:
 655                                 if bld.node_sigs[variant][node.id] != new_sig:
 656                                         return RUN_ME
 657                         except KeyError:
 658                                 debug("task: task %r must run as the output nodes do not exist", self)
 659                                 return RUN_ME
 660
 661                 # debug if asked to
 662                 if Logs.verbose: self.debug_why(bld.task_sigs[key])
 663
 664                 if new_sig != prev_sig:
 665                         return RUN_ME
 666                 return SKIP_ME
 667
 668         def post_run(self):
 669                 "called after a successful task run"
 670                 bld = self.generator.bld
 671                 env = self.env
 672                 sig = self.signature()
 673                 ssig = sig.encode('hex')
 674
 675                 variant = env.variant()
 676                 for node in self.outputs:
 677                         # check if the node exists ..
 678                         try:
 679                                 os.stat(node.abspath(env))
 680                         except OSError:
 681                                 self.hasrun = MISSING
 682                                 self.err_msg = '-> missing file: %r' % node.abspath(env)
 683                                 raise Utils.WafError
 684
 685                         # important, store the signature for the next run
 686                         bld.node_sigs[variant][node.id] = sig
 687                 bld.task_sigs[self.unique_id()] = self.cache_sig
 688
 689                 # file caching, if possible
 690                 # try to avoid data corruption as much as possible
 691                 if not Options.cache_global or Options.options.nocache or not self.outputs:
 692                         return None
 693
 694                 if getattr(self, 'cached', None):
 695                         return None
 696
 697                 dname = os.path.join(Options.cache_global, ssig)
 698                 tmpdir = tempfile.mkdtemp(prefix=Options.cache_global + os.sep + 'waf')
 699
 700                 try:
 701                         shutil.rmtree(dname)
 702                 except:
 703                         pass
 704
 705                 try:
 706                         i = 0
 707                         for node in self.outputs:
 708                                 variant = node.variant(env)
 709                                 dest = os.path.join(tmpdir, str(i) + node.name)
 710                                 shutil.copy2(node.abspath(env), dest)
 711                                 i += 1
 712                 except (OSError, IOError):
 713                         try:
 714                                 shutil.rmtree(tmpdir)
 715                         except:
 716                                 pass
 717                 else:
 718                         try:
 719                                 os.rename(tmpdir, dname)
 720                         except OSError:
 721                                 try:
 722                                         shutil.rmtree(tmpdir)
 723                                 except:
 724                                         pass
 725                         else:
 726                                 try:
 727                                         os.chmod(dname, O755)
 728                                 except:
 729                                         pass
 730
 731         def can_retrieve_cache(self):
 732                 """
 733                 Retrieve build nodes from the cache
 734                 update the file timestamps to help cleaning the least used entries from the cache
 735                 additionally, set an attribute 'cached' to avoid re-creating the same cache files
 736
 737                 suppose there are files in cache/dir1/file1 and cache/dir2/file2
 738                 first, read the timestamp of dir1
 739                 then try to copy the files
 740                 then look at the timestamp again, if it has changed, the data may have been corrupt (cache update by another process)
 741                 should an exception occur, ignore the data
 742                 """
 743                 if not Options.cache_global or Options.options.nocache or not self.outputs:
 744                         return None
 745
 746                 env = self.env
 747                 sig = self.signature()
 748                 ssig = sig.encode('hex')
 749
 750                 # first try to access the cache folder for the task
 751                 dname = os.path.join(Options.cache_global, ssig)
 752                 try:
 753                         t1 = os.stat(dname).st_mtime
 754                 except OSError:
 755                         return None
 756
 757                 i = 0
 758                 for node in self.outputs:
 759                         variant = node.variant(env)
 760
 761                         orig = os.path.join(dname, str(i) + node.name)
 762                         try:
 763                                 shutil.copy2(orig, node.abspath(env))
 764                                 # mark the cache file as used recently (modified)
 765                                 os.utime(orig, None)
 766                         except (OSError, IOError):
 767                                 debug('task: failed retrieving file')
 768                                 return None
 769                         i += 1
 770
 771                 # is it the same folder?
 772                 try:
 773                         t2 = os.stat(dname).st_mtime
 774                 except OSError:
 775                         return None
 776
 777                 if t1 != t2:
 778                         return None
 779
 780                 for node in self.outputs:
 781                         self.generator.bld.node_sigs[variant][node.id] = sig
 782                         if Options.options.progress_bar < 1:
 783                                 self.generator.bld.printout('restoring from cache %r\n' % node.bldpath(env))
 784
 785                 self.cached = True
 786                 return 1
 787
 788         def debug_why(self, old_sigs):
 789                 "explains why a task is run"
 790
 791                 new_sigs = self.cache_sig
 792                 def v(x):
 793                         return x.encode('hex')
 794
 795                 debug("Task %r", self)
 796                 msgs = ['Task must run', '* Source file or manual dependency', '* Implicit dependency', '* Environment variable']
 797                 tmp = 'task: -> %s: %s %s'
 798                 for x in xrange(len(msgs)):
 799                         if (new_sigs[x] != old_sigs[x]):
 800                                 debug(tmp, msgs[x], v(old_sigs[x]), v(new_sigs[x]))
 801
 802         def sig_explicit_deps(self):
 803                 bld = self.generator.bld
 804                 up = self.m.update
 805
 806                 # the inputs
 807                 for x in self.inputs + getattr(self, 'dep_nodes', []):
 808                         if not x.parent.id in bld.cache_scanned_folders:
 809                                 bld.rescan(x.parent)
 810
 811                         variant = x.variant(self.env)
 812                         try:
 813                                 up(bld.node_sigs[variant][x.id])
 814                         except KeyError:
 815                                 raise Utils.WafError('Missing node signature for %r (required by %r)' % (x, self))
 816
 817                 # manual dependencies, they can slow down the builds
 818                 if bld.deps_man:
 819                         additional_deps = bld.deps_man
 820                         for x in self.inputs + self.outputs:
 821                                 try:
 822                                         d = additional_deps[x.id]
 823                                 except KeyError:
 824                                         continue
 825
 826                                 for v in d:
 827                                         if isinstance(v, Node.Node):
 828                                                 bld.rescan(v.parent)
 829                                                 variant = v.variant(self.env)
 830                                                 try:
 831                                                         v = bld.node_sigs[variant][v.id]
 832                                                 except KeyError:
 833                                                         raise Utils.WafError('Missing node signature for %r (required by %r)' % (v, self))
 834                                         elif hasattr(v, '__call__'):
 835                                                 v = v() # dependency is a function, call it
 836                                         up(v)
 837
 838                 for x in self.dep_nodes:
 839                         v = bld.node_sigs[x.variant(self.env)][x.id]
 840                         up(v)
 841
 842                 return self.m.digest()
 843
 844         def sig_vars(self):
 845                 bld = self.generator.bld
 846                 env = self.env
 847
 848                 # dependencies on the environment vars
 849                 act_sig = bld.hash_env_vars(env, self.__class__.vars)
 850                 self.m.update(act_sig)
 851
 852                 # additional variable dependencies, if provided
 853                 dep_vars = getattr(self, 'dep_vars', None)
 854                 if dep_vars:
 855                         self.m.update(bld.hash_env_vars(env, dep_vars))
 856
 857                 return self.m.digest()
 858
 859         #def scan(self, node):
 860         #       """this method returns a tuple containing:
 861         #       * a list of nodes corresponding to real files
 862         #       * a list of names for files not found in path_lst
 863         #       the input parameters may have more parameters that the ones used below
 864         #       """
 865         #       return ((), ())
 866         scan = None
 867
 868         # compute the signature, recompute it if there is no match in the cache
 869         def sig_implicit_deps(self):
 870                 "the signature obtained may not be the one if the files have changed, we do it in two steps"
 871
 872                 bld = self.generator.bld
 873
 874                 # get the task signatures from previous runs
 875                 key = self.unique_id()
 876                 prev_sigs = bld.task_sigs.get(key, ())
 877                 if prev_sigs:
 878                         try:
 879                                 # for issue #379
 880                                 if prev_sigs[2] == self.compute_sig_implicit_deps():
 881                                         return prev_sigs[2]
 882                         except (KeyError, OSError):
 883                                 pass
 884                         del bld.task_sigs[key]
 885                         raise ValueError('rescan')
 886
 887                 # no previous run or the signature of the dependencies has changed, rescan the dependencies
 888                 (nodes, names) = self.scan()
 889                 if Logs.verbose:
 890                         debug('deps: scanner for %s returned %s %s', str(self), str(nodes), str(names))
 891
 892                 # store the dependencies in the cache
 893                 bld.node_deps[key] = nodes
 894                 bld.raw_deps[key] = names
 895
 896                 # recompute the signature and return it
 897                 try:
 898                         sig = self.compute_sig_implicit_deps()
 899                 except KeyError:
 900                         try:
 901                                 nodes = []
 902                                 for k in bld.node_deps.get(self.unique_id(), []):
 903                                         if k.id & 3 == 2: # Node.FILE:
 904                                                 if not k.id in bld.node_sigs[0]:
 905                                                         nodes.append(k)
 906                                         else:
 907                                                 if not k.id in bld.node_sigs[self.env.variant()]:
 908                                                         nodes.append(k)
 909                         except:
 910                                 nodes = '?'
 911                         raise Utils.WafError('Missing node signature for %r (for implicit dependencies %r)' % (nodes, self))
 912
 913                 return sig
 914
 915         def compute_sig_implicit_deps(self):
 916                 """it is intended for .cpp and inferred .h files
 917                 there is a single list (no tree traversal)
 918                 this is the hot spot so ... do not touch"""
 919                 upd = self.m.update
 920
 921                 bld = self.generator.bld
 922                 tstamp = bld.node_sigs
 923                 env = self.env
 924
 925                 for k in bld.node_deps.get(self.unique_id(), []):
 926                         # unlikely but necessary if it happens
 927                         if not k.parent.id in bld.cache_scanned_folders:
 928                                 # if the parent folder is removed, an OSError may be thrown
 929                                 bld.rescan(k.parent)
 930
 931                         # if the parent folder is removed, a KeyError will be thrown
 932                         if k.id & 3 == 2: # Node.FILE:
 933                                 upd(tstamp[0][k.id])
 934                         else:
 935                                 upd(tstamp[env.variant()][k.id])
 936
 937                 return self.m.digest()
 938
 939 def funex(c):
 940         dc = {}
 941         exec(c, dc)
 942         return dc['f']
 943
 944 reg_act = re.compile(r"(?P<backslash>\\)|(?P<dollar>\$\$)|(?P<subst>\$\{(?P<var>\w+)(?P<code>.*?)\})", re.M)
 945 def compile_fun_shell(name, line):
 946         """Compiles a string (once) into a function, eg:
 947         simple_task_type('c++', '${CXX} -o ${TGT[0]} ${SRC} -I ${SRC[0].parent.bldpath()}')
 948
 949         The env variables (CXX, ..) on the task must not hold dicts (order)
 950         The reserved keywords TGT and SRC represent the task input and output nodes
 951
 952         quick test:
 953         bld(source='wscript', rule='echo "foo\\${SRC[0].name}\\bar"')
 954         """
 955
 956         extr = []
 957         def repl(match):
 958                 g = match.group
 959                 if g('dollar'): return "$"
 960                 elif g('backslash'): return '\\\\'
 961                 elif g('subst'): extr.append((g('var'), g('code'))); return "%s"
 962                 return None
 963
 964         line = reg_act.sub(repl, line) or line
 965
 966         parm = []
 967         dvars = []
 968         app = parm.append
 969         for (var, meth) in extr:
 970                 if var == 'SRC':
 971                         if meth: app('task.inputs%s' % meth)
 972                         else: app('" ".join([a.srcpath(env) for a in task.inputs])')
 973                 elif var == 'TGT':
 974                         if meth: app('task.outputs%s' % meth)
 975                         else: app('" ".join([a.bldpath(env) for a in task.outputs])')
 976                 else:
 977                         if not var in dvars: dvars.append(var)
 978                         app("p('%s')" % var)
 979         if parm: parm = "%% (%s) " % (',\n\t\t'.join(parm))
 980         else: parm = ''
 981
 982         c = COMPILE_TEMPLATE_SHELL % (line, parm)
 983
 984         debug('action: %s', c)
 985         return (funex(c), dvars)
 986
 987 def compile_fun_noshell(name, line):
 988
 989         extr = []
 990         def repl(match):
 991                 g = match.group
 992                 if g('dollar'): return "$"
 993                 elif g('subst'): extr.append((g('var'), g('code'))); return "<<|@|>>"
 994                 return None
 995
 996         line2 = reg_act.sub(repl, line)
 997         params = line2.split('<<|@|>>')
 998
 999         buf = []
1000         dvars = []
1001         app = buf.append
1002         for x in xrange(len(extr)):
1003                 params[x] = params[x].strip()
1004                 if params[x]:
1005                         app("lst.extend(%r)" % params[x].split())
1006                 (var, meth) = extr[x]
1007                 if var == 'SRC':
1008                         if meth: app('lst.append(task.inputs%s)' % meth)
1009                         else: app("lst.extend([a.srcpath(env) for a in task.inputs])")
1010                 elif var == 'TGT':
1011                         if meth: app('lst.append(task.outputs%s)' % meth)
1012                         else: app("lst.extend([a.bldpath(env) for a in task.outputs])")
1013                 else:
1014                         app('lst.extend(to_list(env[%r]))' % var)
1015                         if not var in dvars: dvars.append(var)
1016
1017         if params[-1]:
1018                 app("lst.extend(%r)" % shlex.split(params[-1]))
1019
1020         fun = COMPILE_TEMPLATE_NOSHELL % "\n\t".join(buf)
1021         debug('action: %s', fun)
1022         return (funex(fun), dvars)
1023
1024 def compile_fun(name, line, shell=None):
1025         "commands can be launched by the shell or not"
1026         if line.find('<') > 0 or line.find('>') > 0 or line.find('&&') > 0:
1027                 shell = True
1028         #else:
1029         #       shell = False
1030
1031         if shell is None:
1032                 if sys.platform == 'win32':
1033                         shell = False
1034                 else:
1035                         shell = True
1036
1037         if shell:
1038                 return compile_fun_shell(name, line)
1039         else:
1040                 return compile_fun_noshell(name, line)
1041
1042 def simple_task_type(name, line, color='GREEN', vars=[], ext_in=[], ext_out=[], before=[], after=[], shell=None):
1043         """return a new Task subclass with the function run compiled from the line given"""
1044         (fun, dvars) = compile_fun(name, line, shell)
1045         fun.code = line
1046         return task_type_from_func(name, fun, vars or dvars, color, ext_in, ext_out, before, after)
1047
1048 def task_type_from_func(name, func, vars=[], color='GREEN', ext_in=[], ext_out=[], before=[], after=[]):
1049         """return a new Task subclass with the function run compiled from the line given"""
1050         params = {
1051                 'run': func,
1052                 'vars': vars,
1053                 'color': color,
1054                 'name': name,
1055                 'ext_in': Utils.to_list(ext_in),
1056                 'ext_out': Utils.to_list(ext_out),
1057                 'before': Utils.to_list(before),
1058                 'after': Utils.to_list(after),
1059         }
1060
1061         cls = type(Task)(name, (Task,), params)
1062         TaskBase.classes[name] = cls
1063         return cls
1064
1065 def always_run(cls):
1066         """Set all task instances of this class to be executed whenever a build is started
1067         The task signature is calculated, but the result of the comparation between
1068         task signatures is bypassed
1069         """
1070         old = cls.runnable_status
1071         def always(self):
1072                 ret = old(self)
1073                 if ret == SKIP_ME:
1074                         return RUN_ME
1075                 return ret
1076         cls.runnable_status = always
1077
1078 def update_outputs(cls):
1079         """When a command is always run, it is possible that the output only change
1080         sometimes. By default the build node have as a hash the signature of the task
1081         which may not change. With this, the output nodes (produced) are hashed,
1082         and the hashes are set to the build nodes
1083
1084         This may avoid unnecessary recompilations, but it uses more resources
1085         (hashing the output files) so it is not used by default
1086         """
1087         old_post_run = cls.post_run
1088         def post_run(self):
1089                 old_post_run(self)
1090                 bld = self.generator.bld
1091                 for output in self.outputs:
1092                         bld.node_sigs[self.env.variant()][output.id] = Utils.h_file(output.abspath(self.env))
1093                         bld.task_sigs[output.id] = self.unique_id()
1094         cls.post_run = post_run
1095
1096         old_runnable_status = cls.runnable_status
1097         def runnable_status(self):
1098                 status = old_runnable_status(self)
1099                 if status != RUN_ME:
1100                         return status
1101
1102                 uid = self.unique_id()
1103                 try:
1104                         bld = self.outputs[0].__class__.bld
1105                         new_sig  = self.signature()
1106                         prev_sig = bld.task_sigs[uid][0]
1107                         if prev_sig == new_sig:
1108                                 for x in self.outputs:
1109                                         if not x.id in bld.node_sigs[self.env.variant()]:
1110                                                 return RUN_ME
1111                                         if bld.task_sigs[x.id] != uid: # ensure the outputs are associated with *this* task
1112                                                 return RUN_ME
1113                                 return SKIP_ME
1114                 except KeyError:
1115                         pass
1116                 except IndexError:
1117                         pass
1118                 return RUN_ME
1119         cls.runnable_status = runnable_status
1120
1121 def extract_outputs(tasks):
1122         """file_deps: Infer additional dependencies from task input and output nodes
1123         """
1124         v = {}
1125         for x in tasks:
1126                 try:
1127                         (ins, outs) = v[x.env.variant()]
1128                 except KeyError:
1129                         ins = {}
1130                         outs = {}
1131                         v[x.env.variant()] = (ins, outs)
1132
1133                 for a in getattr(x, 'inputs', []):
1134                         try: ins[a.id].append(x)
1135                         except KeyError: ins[a.id] = [x]
1136                 for a in getattr(x, 'outputs', []):
1137                         try: outs[a.id].append(x)
1138                         except KeyError: outs[a.id] = [x]
1139
1140         for (ins, outs) in v.values():
1141                 links = set(ins.iterkeys()).intersection(outs.iterkeys())
1142                 for k in links:
1143                         for a in ins[k]:
1144                                 for b in outs[k]:
1145                                         a.set_run_after(b)
1146
1147 def extract_deps(tasks):
1148         """file_deps: Infer additional dependencies from task input and output nodes and from implicit dependencies
1149         returned by the scanners - that will only work if all tasks are created
1150
1151         this is aimed at people who have pathological builds and who do not care enough
1152         to implement the build dependencies properly
1153
1154         with two loops over the list of tasks, do not expect this to be really fast
1155         """
1156
1157         # first reuse the function above
1158         extract_outputs(tasks)
1159
1160         # map the output nodes to the tasks producing them
1161         out_to_task = {}
1162         for x in tasks:
1163                 v = x.env.variant()
1164                 try:
1165                         lst = x.outputs
1166                 except AttributeError:
1167                         pass
1168                 else:
1169                         for node in lst:
1170                                 out_to_task[(v, node.id)] = x
1171
1172         # map the dependencies found to the tasks compiled
1173         dep_to_task = {}
1174         for x in tasks:
1175                 try:
1176                         x.signature()
1177                 except: # this is on purpose
1178                         pass
1179
1180                 v = x.env.variant()
1181                 key = x.unique_id()
1182                 for k in x.generator.bld.node_deps.get(x.unique_id(), []):
1183                         try: dep_to_task[(v, k.id)].append(x)
1184                         except KeyError: dep_to_task[(v, k.id)] = [x]
1185
1186         # now get the intersection
1187         deps = set(dep_to_task.keys()).intersection(set(out_to_task.keys()))
1188
1189         # and add the dependencies from task to task
1190         for idx in deps:
1191                 for k in dep_to_task[idx]:
1192                         k.set_run_after(out_to_task[idx])
1193
1194         # cleanup, remove the signatures
1195         for x in tasks:
1196                 try:
1197                         delattr(x, 'cache_sig')
1198                 except AttributeError:
1199                         pass
1200