s4-param: Use a unique header name
[Samba.git] / buildtools / wafadmin / Task.py
blob5cda2ec7301f592f3b5afa0c593eb2f9118077a5
1 #!/usr/bin/env python
2 # encoding: utf-8
3 # Thomas Nagy, 2005-2008 (ita)
5 """
6 Running tasks in parallel is a simple problem, but in practice it is more complicated:
7 * dependencies discovered during the build (dynamic task creation)
8 * dependencies discovered after files are compiled
9 * the amount of tasks and dependencies (graph size) can be huge
11 This is why the dependency management is split on three different levels:
12 1. groups of tasks that run all after another group of tasks
13 2. groups of tasks that can be run in parallel
14 3. tasks that can run in parallel, but with possible unknown ad-hoc dependencies
16 The point #1 represents a strict sequential order between groups of tasks, for example a compiler is produced
17 and used to compile the rest, whereas #2 and #3 represent partial order constraints where #2 applies to the kind of task
18 and #3 applies to the task instances.
20 #1 is held by the task manager: ordered list of TaskGroups (see bld.add_group)
21 #2 is held by the task groups and the task types: precedence after/before (topological sort),
22 and the constraints extracted from file extensions
23 #3 is held by the tasks individually (attribute run_after),
24 and the scheduler (Runner.py) use Task::runnable_status to reorder the tasks
28 To try, use something like this in your code:
29 import Constants, Task
30 Task.algotype = Constants.MAXPARALLEL
34 There are two concepts with the tasks (individual units of change):
35 * dependency (if 1 is recompiled, recompile 2)
36 * order (run 2 after 1)
38 example 1: if t1 depends on t2 and t2 depends on t3 it is not necessary to make t1 depend on t3 (dependency is transitive)
39 example 2: if t1 depends on a node produced by t2, it is not immediately obvious that t1 must run after t2 (order is not obvious)
41 The role of the Task Manager is to give the tasks in order (groups of task that may be run in parallel one after the other)
43 """
45 import os, shutil, sys, re, random, datetime, tempfile, shlex
46 from Utils import md5
47 import Build, Runner, Utils, Node, Logs, Options
48 from Logs import debug, warn, error
49 from Constants import *
51 algotype = NORMAL
52 #algotype = JOBCONTROL
53 #algotype = MAXPARALLEL
55 COMPILE_TEMPLATE_SHELL = '''
56 def f(task):
57 env = task.env
58 wd = getattr(task, 'cwd', None)
59 p = env.get_flat
60 cmd = \'\'\' %s \'\'\' % s
61 return task.exec_command(cmd, cwd=wd)
62 '''
64 COMPILE_TEMPLATE_NOSHELL = '''
65 def f(task):
66 env = task.env
67 wd = getattr(task, 'cwd', None)
68 def to_list(xx):
69 if isinstance(xx, str): return [xx]
70 return xx
71 lst = []
73 lst = [x for x in lst if x]
74 return task.exec_command(lst, cwd=wd)
75 '''
78 """
79 Enable different kind of dependency algorithms:
80 1 make groups: first compile all cpps and then compile all links (NORMAL)
81 2 parallelize all (each link task run after its dependencies) (MAXPARALLEL)
82 3 like 1 but provide additional constraints for the parallelization (MAXJOBS)
84 In theory 1. will be faster than 2 for waf, but might be slower for builds
85 The scheme 2 will not allow for running tasks one by one so it can cause disk thrashing on huge builds
86 """
88 file_deps = Utils.nada
89 """
90 Additional dependency pre-check may be added by replacing the function file_deps.
91 e.g. extract_outputs, extract_deps below.
92 """
94 class TaskManager(object):
95 """The manager is attached to the build object, it holds a list of TaskGroup"""
96 def __init__(self):
97 self.groups = []
98 self.tasks_done = []
99 self.current_group = 0
100 self.groups_names = {}
102 def group_name(self, g):
103 """name for the group g (utility)"""
104 if not isinstance(g, TaskGroup):
105 g = self.groups[g]
106 for x in self.groups_names:
107 if id(self.groups_names[x]) == id(g):
108 return x
109 return ''
111 def group_idx(self, tg):
112 """group the task generator tg is in"""
113 se = id(tg)
114 for i in range(len(self.groups)):
115 g = self.groups[i]
116 for t in g.tasks_gen:
117 if id(t) == se:
118 return i
119 return None
121 def get_next_set(self):
122 """return the next set of tasks to execute
123 the first parameter is the maximum amount of parallelization that may occur"""
124 ret = None
125 while not ret and self.current_group < len(self.groups):
126 ret = self.groups[self.current_group].get_next_set()
127 if ret: return ret
128 else:
129 self.groups[self.current_group].process_install()
130 self.current_group += 1
131 return (None, None)
133 def add_group(self, name=None, set=True):
134 #if self.groups and not self.groups[0].tasks:
135 # error('add_group: an empty group is already present')
136 g = TaskGroup()
138 if name and name in self.groups_names:
139 error('add_group: name %s already present' % name)
140 self.groups_names[name] = g
141 self.groups.append(g)
142 if set:
143 self.current_group = len(self.groups) - 1
145 def set_group(self, idx):
146 if isinstance(idx, str):
147 g = self.groups_names[idx]
148 for x in xrange(len(self.groups)):
149 if id(g) == id(self.groups[x]):
150 self.current_group = x
151 else:
152 self.current_group = idx
154 def add_task_gen(self, tgen):
155 if not self.groups: self.add_group()
156 self.groups[self.current_group].tasks_gen.append(tgen)
158 def add_task(self, task):
159 if not self.groups: self.add_group()
160 self.groups[self.current_group].tasks.append(task)
162 def total(self):
163 total = 0
164 if not self.groups: return 0
165 for group in self.groups:
166 total += len(group.tasks)
167 return total
169 def add_finished(self, tsk):
170 self.tasks_done.append(tsk)
171 bld = tsk.generator.bld
172 if bld.is_install:
173 f = None
174 if 'install' in tsk.__dict__:
175 f = tsk.__dict__['install']
176 # install=0 to prevent installation
177 if f: f(tsk)
178 else:
179 tsk.install()
181 class TaskGroup(object):
182 "the compilation of one group does not begin until the previous group has finished (in the manager)"
183 def __init__(self):
184 self.tasks = [] # this list will be consumed
185 self.tasks_gen = []
187 self.cstr_groups = Utils.DefaultDict(list) # tasks having equivalent constraints
188 self.cstr_order = Utils.DefaultDict(set) # partial order between the cstr groups
189 self.temp_tasks = [] # tasks put on hold
190 self.ready = 0
191 self.post_funs = []
193 def reset(self):
194 "clears the state of the object (put back the tasks into self.tasks)"
195 for x in self.cstr_groups:
196 self.tasks += self.cstr_groups[x]
197 self.tasks = self.temp_tasks + self.tasks
198 self.temp_tasks = []
199 self.cstr_groups = Utils.DefaultDict(list)
200 self.cstr_order = Utils.DefaultDict(set)
201 self.ready = 0
203 def process_install(self):
204 for (f, k, kw) in self.post_funs:
205 f(*k, **kw)
207 def prepare(self):
208 "prepare the scheduling"
209 self.ready = 1
210 file_deps(self.tasks)
211 self.make_cstr_groups()
212 self.extract_constraints()
214 def get_next_set(self):
215 "next list of tasks to execute using max job settings, returns (maxjobs, task_list)"
216 global algotype
217 if algotype == NORMAL:
218 tasks = self.tasks_in_parallel()
219 maxj = MAXJOBS
220 elif algotype == JOBCONTROL:
221 (maxj, tasks) = self.tasks_by_max_jobs()
222 elif algotype == MAXPARALLEL:
223 tasks = self.tasks_with_inner_constraints()
224 maxj = MAXJOBS
225 else:
226 raise Utils.WafError("unknown algorithm type %s" % (algotype))
228 if not tasks: return ()
229 return (maxj, tasks)
231 def make_cstr_groups(self):
232 "unite the tasks that have similar constraints"
233 self.cstr_groups = Utils.DefaultDict(list)
234 for x in self.tasks:
235 h = x.hash_constraints()
236 self.cstr_groups[h].append(x)
238 def set_order(self, a, b):
239 self.cstr_order[a].add(b)
241 def compare_exts(self, t1, t2):
242 "extension production"
243 x = "ext_in"
244 y = "ext_out"
245 in_ = t1.attr(x, ())
246 out_ = t2.attr(y, ())
247 for k in in_:
248 if k in out_:
249 return -1
250 in_ = t2.attr(x, ())
251 out_ = t1.attr(y, ())
252 for k in in_:
253 if k in out_:
254 return 1
255 return 0
257 def compare_partial(self, t1, t2):
258 "partial relations after/before"
259 m = "after"
260 n = "before"
261 name = t2.__class__.__name__
262 if name in Utils.to_list(t1.attr(m, ())): return -1
263 elif name in Utils.to_list(t1.attr(n, ())): return 1
264 name = t1.__class__.__name__
265 if name in Utils.to_list(t2.attr(m, ())): return 1
266 elif name in Utils.to_list(t2.attr(n, ())): return -1
267 return 0
269 def extract_constraints(self):
270 "extract the parallelization constraints from the tasks with different constraints"
271 keys = self.cstr_groups.keys()
272 max = len(keys)
273 # hopefully the length of this list is short
274 for i in xrange(max):
275 t1 = self.cstr_groups[keys[i]][0]
276 for j in xrange(i + 1, max):
277 t2 = self.cstr_groups[keys[j]][0]
279 # add the constraints based on the comparisons
280 val = (self.compare_exts(t1, t2)
281 or self.compare_partial(t1, t2)
283 if val > 0:
284 self.set_order(keys[i], keys[j])
285 elif val < 0:
286 self.set_order(keys[j], keys[i])
288 def tasks_in_parallel(self):
289 "(NORMAL) next list of tasks that may be executed in parallel"
291 if not self.ready: self.prepare()
293 keys = self.cstr_groups.keys()
295 unconnected = []
296 remainder = []
298 for u in keys:
299 for k in self.cstr_order.values():
300 if u in k:
301 remainder.append(u)
302 break
303 else:
304 unconnected.append(u)
306 toreturn = []
307 for y in unconnected:
308 toreturn.extend(self.cstr_groups[y])
310 # remove stuff only after
311 for y in unconnected:
312 try: self.cstr_order.__delitem__(y)
313 except KeyError: pass
314 self.cstr_groups.__delitem__(y)
316 if not toreturn and remainder:
317 raise Utils.WafError("circular order constraint detected %r" % remainder)
319 return toreturn
321 def tasks_by_max_jobs(self):
322 "(JOBCONTROL) returns the tasks that can run in parallel with the max amount of jobs"
323 if not self.ready: self.prepare()
324 if not self.temp_tasks: self.temp_tasks = self.tasks_in_parallel()
325 if not self.temp_tasks: return (None, None)
327 maxjobs = MAXJOBS
328 ret = []
329 remaining = []
330 for t in self.temp_tasks:
331 m = getattr(t, "maxjobs", getattr(self.__class__, "maxjobs", MAXJOBS))
332 if m > maxjobs:
333 remaining.append(t)
334 elif m < maxjobs:
335 remaining += ret
336 ret = [t]
337 maxjobs = m
338 else:
339 ret.append(t)
340 self.temp_tasks = remaining
341 return (maxjobs, ret)
343 def tasks_with_inner_constraints(self):
344 """(MAXPARALLEL) returns all tasks in this group, but add the constraints on each task instance
345 as an optimization, it might be desirable to discard the tasks which do not have to run"""
346 if not self.ready: self.prepare()
348 if getattr(self, "done", None): return None
350 for p in self.cstr_order:
351 for v in self.cstr_order[p]:
352 for m in self.cstr_groups[p]:
353 for n in self.cstr_groups[v]:
354 n.set_run_after(m)
355 self.cstr_order = Utils.DefaultDict(set)
356 self.cstr_groups = Utils.DefaultDict(list)
357 self.done = 1
358 return self.tasks[:] # make a copy
360 class store_task_type(type):
361 "store the task types that have a name ending in _task into a map (remember the existing task types)"
362 def __init__(cls, name, bases, dict):
363 super(store_task_type, cls).__init__(name, bases, dict)
364 name = cls.__name__
366 if name.endswith('_task'):
367 name = name.replace('_task', '')
368 if name != 'TaskBase':
369 TaskBase.classes[name] = cls
371 class TaskBase(object):
372 """Base class for all Waf tasks
374 The most important methods are (by usual order of call):
375 1 runnable_status: ask the task if it should be run, skipped, or if we have to ask later
376 2 __str__: string to display to the user
377 3 run: execute the task
378 4 post_run: after the task is run, update the cache about the task
380 This class should be seen as an interface, it provides the very minimum necessary for the scheduler
381 so it does not do much.
383 For illustration purposes, TaskBase instances try to execute self.fun (if provided)
386 __metaclass__ = store_task_type
388 color = "GREEN"
389 maxjobs = MAXJOBS
390 classes = {}
391 stat = None
393 def __init__(self, *k, **kw):
394 self.hasrun = NOT_RUN
396 try:
397 self.generator = kw['generator']
398 except KeyError:
399 self.generator = self
400 self.bld = Build.bld
402 if kw.get('normal', 1):
403 self.generator.bld.task_manager.add_task(self)
405 def __repr__(self):
406 "used for debugging"
407 return '\n\t{task: %s %s}' % (self.__class__.__name__, str(getattr(self, "fun", "")))
409 def __str__(self):
410 "string to display to the user"
411 if hasattr(self, 'fun'):
412 return 'executing: %s\n' % self.fun.__name__
413 return self.__class__.__name__ + '\n'
415 def exec_command(self, *k, **kw):
416 "use this for executing commands from tasks"
417 # TODO in waf 1.6, eliminate bld.exec_command, and move the cwd processing to here
418 if self.env['env']:
419 kw['env'] = self.env['env']
420 return self.generator.bld.exec_command(*k, **kw)
422 def runnable_status(self):
423 "RUN_ME SKIP_ME or ASK_LATER"
424 return RUN_ME
426 def can_retrieve_cache(self):
427 return False
429 def call_run(self):
430 if self.can_retrieve_cache():
431 return 0
432 return self.run()
434 def run(self):
435 "called if the task must run"
436 if hasattr(self, 'fun'):
437 return self.fun(self)
438 return 0
440 def post_run(self):
441 "update the dependency tree (node stats)"
442 pass
444 def display(self):
445 "print either the description (using __str__) or the progress bar or the ide output"
446 col1 = Logs.colors(self.color)
447 col2 = Logs.colors.NORMAL
449 if Options.options.progress_bar == 1:
450 return self.generator.bld.progress_line(self.position[0], self.position[1], col1, col2)
452 if Options.options.progress_bar == 2:
453 ela = Utils.get_elapsed_time(self.generator.bld.ini)
454 try:
455 ins = ','.join([n.name for n in self.inputs])
456 except AttributeError:
457 ins = ''
458 try:
459 outs = ','.join([n.name for n in self.outputs])
460 except AttributeError:
461 outs = ''
462 return '|Total %s|Current %s|Inputs %s|Outputs %s|Time %s|\n' % (self.position[1], self.position[0], ins, outs, ela)
464 total = self.position[1]
465 n = len(str(total))
466 fs = '[%%%dd/%%%dd] %%s%%s%%s' % (n, n)
467 return fs % (self.position[0], self.position[1], col1, str(self), col2)
469 def attr(self, att, default=None):
470 "retrieve an attribute from the instance or from the class (microoptimization here)"
471 ret = getattr(self, att, self)
472 if ret is self: return getattr(self.__class__, att, default)
473 return ret
475 def hash_constraints(self):
476 "identify a task type for all the constraints relevant for the scheduler: precedence, file production"
477 a = self.attr
478 sum = hash((self.__class__.__name__,
479 str(a('before', '')),
480 str(a('after', '')),
481 str(a('ext_in', '')),
482 str(a('ext_out', '')),
483 self.__class__.maxjobs))
484 return sum
486 def format_error(self):
487 "error message to display to the user (when a build fails)"
488 if getattr(self, "err_msg", None):
489 return self.err_msg
490 elif self.hasrun == CRASHED:
491 try:
492 return " -> task failed (err #%d): %r" % (self.err_code, self)
493 except AttributeError:
494 return " -> task failed: %r" % self
495 elif self.hasrun == MISSING:
496 return " -> missing files: %r" % self
497 else:
498 return ''
500 def install(self):
502 installation is performed by looking at the task attributes:
503 * install_path: installation path like "${PREFIX}/bin"
504 * filename: install the first node in the outputs as a file with a particular name, be certain to give os.sep
505 * chmod: permissions
507 bld = self.generator.bld
508 d = self.attr('install')
510 if self.attr('install_path'):
511 lst = [a.relpath_gen(bld.srcnode) for a in self.outputs]
512 perm = self.attr('chmod', O644)
513 if self.attr('src'):
514 # if src is given, install the sources too
515 lst += [a.relpath_gen(bld.srcnode) for a in self.inputs]
516 if self.attr('filename'):
517 dir = self.install_path.rstrip(os.sep) + os.sep + self.attr('filename')
518 bld.install_as(dir, lst[0], self.env, perm)
519 else:
520 bld.install_files(self.install_path, lst, self.env, perm)
522 class Task(TaskBase):
523 """The parent class is quite limited, in this version:
524 * file system interaction: input and output nodes
525 * persistence: do not re-execute tasks that have already run
526 * caching: same files can be saved and retrieved from a cache directory
527 * dependencies:
528 implicit, like .c files depending on .h files
529 explicit, like the input nodes or the dep_nodes
530 environment variables, like the CXXFLAGS in self.env
532 vars = []
533 def __init__(self, env, **kw):
534 TaskBase.__init__(self, **kw)
535 self.env = env
537 # inputs and outputs are nodes
538 # use setters when possible
539 self.inputs = []
540 self.outputs = []
542 self.dep_nodes = []
543 self.run_after = []
545 # Additionally, you may define the following
546 #self.dep_vars = 'PREFIX DATADIR'
548 def __str__(self):
549 "string to display to the user"
550 env = self.env
551 src_str = ' '.join([a.nice_path(env) for a in self.inputs])
552 tgt_str = ' '.join([a.nice_path(env) for a in self.outputs])
553 if self.outputs: sep = ' -> '
554 else: sep = ''
555 return '%s: %s%s%s\n' % (self.__class__.__name__.replace('_task', ''), src_str, sep, tgt_str)
557 def __repr__(self):
558 return "".join(['\n\t{task: ', self.__class__.__name__, " ", ",".join([x.name for x in self.inputs]), " -> ", ",".join([x.name for x in self.outputs]), '}'])
560 def unique_id(self):
561 "get a unique id: hash the node paths, the variant, the class, the function"
562 try:
563 return self.uid
564 except AttributeError:
565 "this is not a real hot zone, but we want to avoid surprizes here"
566 m = md5()
567 up = m.update
568 up(self.__class__.__name__)
569 up(self.env.variant())
570 p = None
571 for x in self.inputs + self.outputs:
572 if p != x.parent.id:
573 p = x.parent.id
574 up(x.parent.abspath())
575 up(x.name)
576 self.uid = m.digest()
577 return self.uid
579 def set_inputs(self, inp):
580 if isinstance(inp, list): self.inputs += inp
581 else: self.inputs.append(inp)
583 def set_outputs(self, out):
584 if isinstance(out, list): self.outputs += out
585 else: self.outputs.append(out)
587 def set_run_after(self, task):
588 "set (scheduler) order on another task"
589 # TODO: handle list or object
590 assert isinstance(task, TaskBase)
591 self.run_after.append(task)
593 def add_file_dependency(self, filename):
594 "TODO user-provided file dependencies"
595 node = self.generator.bld.path.find_resource(filename)
596 self.dep_nodes.append(node)
598 def signature(self):
599 # compute the result one time, and suppose the scan_signature will give the good result
600 try: return self.cache_sig[0]
601 except AttributeError: pass
603 self.m = md5()
605 # explicit deps
606 exp_sig = self.sig_explicit_deps()
608 # env vars
609 var_sig = self.sig_vars()
611 # implicit deps
613 imp_sig = SIG_NIL
614 if self.scan:
615 try:
616 imp_sig = self.sig_implicit_deps()
617 except ValueError:
618 return self.signature()
620 # we now have the signature (first element) and the details (for debugging)
621 ret = self.m.digest()
622 self.cache_sig = (ret, exp_sig, imp_sig, var_sig)
623 return ret
625 def runnable_status(self):
626 "SKIP_ME RUN_ME or ASK_LATER"
627 #return 0 # benchmarking
629 if self.inputs and (not self.outputs):
630 if not getattr(self.__class__, 'quiet', None):
631 warn("invalid task (no inputs OR outputs): override in a Task subclass or set the attribute 'quiet' %r" % self)
633 for t in self.run_after:
634 if not t.hasrun:
635 return ASK_LATER
637 env = self.env
638 bld = self.generator.bld
640 # first compute the signature
641 new_sig = self.signature()
643 # compare the signature to a signature computed previously
644 key = self.unique_id()
645 try:
646 prev_sig = bld.task_sigs[key][0]
647 except KeyError:
648 debug("task: task %r must run as it was never run before or the task code changed", self)
649 return RUN_ME
651 # compare the signatures of the outputs
652 for node in self.outputs:
653 variant = node.variant(env)
654 try:
655 if bld.node_sigs[variant][node.id] != new_sig:
656 return RUN_ME
657 except KeyError:
658 debug("task: task %r must run as the output nodes do not exist", self)
659 return RUN_ME
661 # debug if asked to
662 if Logs.verbose: self.debug_why(bld.task_sigs[key])
664 if new_sig != prev_sig:
665 return RUN_ME
666 return SKIP_ME
668 def post_run(self):
669 "called after a successful task run"
670 bld = self.generator.bld
671 env = self.env
672 sig = self.signature()
673 ssig = sig.encode('hex')
675 variant = env.variant()
676 for node in self.outputs:
677 # check if the node exists ..
678 try:
679 os.stat(node.abspath(env))
680 except OSError:
681 self.hasrun = MISSING
682 self.err_msg = '-> missing file: %r' % node.abspath(env)
683 raise Utils.WafError
685 # important, store the signature for the next run
686 bld.node_sigs[variant][node.id] = sig
687 bld.task_sigs[self.unique_id()] = self.cache_sig
689 # file caching, if possible
690 # try to avoid data corruption as much as possible
691 if not Options.cache_global or Options.options.nocache or not self.outputs:
692 return None
694 if getattr(self, 'cached', None):
695 return None
697 dname = os.path.join(Options.cache_global, ssig)
698 tmpdir = tempfile.mkdtemp(prefix=Options.cache_global + os.sep + 'waf')
700 try:
701 shutil.rmtree(dname)
702 except:
703 pass
705 try:
706 i = 0
707 for node in self.outputs:
708 variant = node.variant(env)
709 dest = os.path.join(tmpdir, str(i) + node.name)
710 shutil.copy2(node.abspath(env), dest)
711 i += 1
712 except (OSError, IOError):
713 try:
714 shutil.rmtree(tmpdir)
715 except:
716 pass
717 else:
718 try:
719 os.rename(tmpdir, dname)
720 except OSError:
721 try:
722 shutil.rmtree(tmpdir)
723 except:
724 pass
725 else:
726 try:
727 os.chmod(dname, O755)
728 except:
729 pass
731 def can_retrieve_cache(self):
733 Retrieve build nodes from the cache
734 update the file timestamps to help cleaning the least used entries from the cache
735 additionally, set an attribute 'cached' to avoid re-creating the same cache files
737 suppose there are files in cache/dir1/file1 and cache/dir2/file2
738 first, read the timestamp of dir1
739 then try to copy the files
740 then look at the timestamp again, if it has changed, the data may have been corrupt (cache update by another process)
741 should an exception occur, ignore the data
743 if not Options.cache_global or Options.options.nocache or not self.outputs:
744 return None
746 env = self.env
747 sig = self.signature()
748 ssig = sig.encode('hex')
750 # first try to access the cache folder for the task
751 dname = os.path.join(Options.cache_global, ssig)
752 try:
753 t1 = os.stat(dname).st_mtime
754 except OSError:
755 return None
757 i = 0
758 for node in self.outputs:
759 variant = node.variant(env)
761 orig = os.path.join(dname, str(i) + node.name)
762 try:
763 shutil.copy2(orig, node.abspath(env))
764 # mark the cache file as used recently (modified)
765 os.utime(orig, None)
766 except (OSError, IOError):
767 debug('task: failed retrieving file')
768 return None
769 i += 1
771 # is it the same folder?
772 try:
773 t2 = os.stat(dname).st_mtime
774 except OSError:
775 return None
777 if t1 != t2:
778 return None
780 for node in self.outputs:
781 self.generator.bld.node_sigs[variant][node.id] = sig
782 if Options.options.progress_bar < 1:
783 self.generator.bld.printout('restoring from cache %r\n' % node.bldpath(env))
785 self.cached = True
786 return 1
788 def debug_why(self, old_sigs):
789 "explains why a task is run"
791 new_sigs = self.cache_sig
792 def v(x):
793 return x.encode('hex')
795 debug("Task %r", self)
796 msgs = ['Task must run', '* Source file or manual dependency', '* Implicit dependency', '* Environment variable']
797 tmp = 'task: -> %s: %s %s'
798 for x in xrange(len(msgs)):
799 if (new_sigs[x] != old_sigs[x]):
800 debug(tmp, msgs[x], v(old_sigs[x]), v(new_sigs[x]))
802 def sig_explicit_deps(self):
803 bld = self.generator.bld
804 up = self.m.update
806 # the inputs
807 for x in self.inputs + getattr(self, 'dep_nodes', []):
808 if not x.parent.id in bld.cache_scanned_folders:
809 bld.rescan(x.parent)
811 variant = x.variant(self.env)
812 try:
813 up(bld.node_sigs[variant][x.id])
814 except KeyError:
815 raise Utils.WafError('Missing node signature for %r (required by %r)' % (x, self))
817 # manual dependencies, they can slow down the builds
818 if bld.deps_man:
819 additional_deps = bld.deps_man
820 for x in self.inputs + self.outputs:
821 try:
822 d = additional_deps[x.id]
823 except KeyError:
824 continue
826 for v in d:
827 if isinstance(v, Node.Node):
828 bld.rescan(v.parent)
829 variant = v.variant(self.env)
830 try:
831 v = bld.node_sigs[variant][v.id]
832 except KeyError:
833 raise Utils.WafError('Missing node signature for %r (required by %r)' % (v, self))
834 elif hasattr(v, '__call__'):
835 v = v() # dependency is a function, call it
836 up(v)
838 for x in self.dep_nodes:
839 v = bld.node_sigs[x.variant(self.env)][x.id]
840 up(v)
842 return self.m.digest()
844 def sig_vars(self):
845 bld = self.generator.bld
846 env = self.env
848 # dependencies on the environment vars
849 act_sig = bld.hash_env_vars(env, self.__class__.vars)
850 self.m.update(act_sig)
852 # additional variable dependencies, if provided
853 dep_vars = getattr(self, 'dep_vars', None)
854 if dep_vars:
855 self.m.update(bld.hash_env_vars(env, dep_vars))
857 return self.m.digest()
859 #def scan(self, node):
860 # """this method returns a tuple containing:
861 # * a list of nodes corresponding to real files
862 # * a list of names for files not found in path_lst
863 # the input parameters may have more parameters that the ones used below
864 # """
865 # return ((), ())
866 scan = None
868 # compute the signature, recompute it if there is no match in the cache
869 def sig_implicit_deps(self):
870 "the signature obtained may not be the one if the files have changed, we do it in two steps"
872 bld = self.generator.bld
874 # get the task signatures from previous runs
875 key = self.unique_id()
876 prev_sigs = bld.task_sigs.get(key, ())
877 if prev_sigs:
878 try:
879 # for issue #379
880 if prev_sigs[2] == self.compute_sig_implicit_deps():
881 return prev_sigs[2]
882 except (KeyError, OSError):
883 pass
884 del bld.task_sigs[key]
885 raise ValueError('rescan')
887 # no previous run or the signature of the dependencies has changed, rescan the dependencies
888 (nodes, names) = self.scan()
889 if Logs.verbose:
890 debug('deps: scanner for %s returned %s %s', str(self), str(nodes), str(names))
892 # store the dependencies in the cache
893 bld.node_deps[key] = nodes
894 bld.raw_deps[key] = names
896 # recompute the signature and return it
897 try:
898 sig = self.compute_sig_implicit_deps()
899 except KeyError:
900 try:
901 nodes = []
902 for k in bld.node_deps.get(self.unique_id(), []):
903 if k.id & 3 == 2: # Node.FILE:
904 if not k.id in bld.node_sigs[0]:
905 nodes.append(k)
906 else:
907 if not k.id in bld.node_sigs[self.env.variant()]:
908 nodes.append(k)
909 except:
910 nodes = '?'
911 raise Utils.WafError('Missing node signature for %r (for implicit dependencies %r)' % (nodes, self))
913 return sig
915 def compute_sig_implicit_deps(self):
916 """it is intended for .cpp and inferred .h files
917 there is a single list (no tree traversal)
918 this is the hot spot so ... do not touch"""
919 upd = self.m.update
921 bld = self.generator.bld
922 tstamp = bld.node_sigs
923 env = self.env
925 for k in bld.node_deps.get(self.unique_id(), []):
926 # unlikely but necessary if it happens
927 if not k.parent.id in bld.cache_scanned_folders:
928 # if the parent folder is removed, an OSError may be thrown
929 bld.rescan(k.parent)
931 # if the parent folder is removed, a KeyError will be thrown
932 if k.id & 3 == 2: # Node.FILE:
933 upd(tstamp[0][k.id])
934 else:
935 upd(tstamp[env.variant()][k.id])
937 return self.m.digest()
939 def funex(c):
940 dc = {}
941 exec(c, dc)
942 return dc['f']
944 reg_act = re.compile(r"(?P<backslash>\\)|(?P<dollar>\$\$)|(?P<subst>\$\{(?P<var>\w+)(?P<code>.*?)\})", re.M)
945 def compile_fun_shell(name, line):
946 """Compiles a string (once) into a function, eg:
947 simple_task_type('c++', '${CXX} -o ${TGT[0]} ${SRC} -I ${SRC[0].parent.bldpath()}')
949 The env variables (CXX, ..) on the task must not hold dicts (order)
950 The reserved keywords TGT and SRC represent the task input and output nodes
952 quick test:
953 bld(source='wscript', rule='echo "foo\\${SRC[0].name}\\bar"')
956 extr = []
957 def repl(match):
958 g = match.group
959 if g('dollar'): return "$"
960 elif g('backslash'): return '\\\\'
961 elif g('subst'): extr.append((g('var'), g('code'))); return "%s"
962 return None
964 line = reg_act.sub(repl, line) or line
966 parm = []
967 dvars = []
968 app = parm.append
969 for (var, meth) in extr:
970 if var == 'SRC':
971 if meth: app('task.inputs%s' % meth)
972 else: app('" ".join([a.srcpath(env) for a in task.inputs])')
973 elif var == 'TGT':
974 if meth: app('task.outputs%s' % meth)
975 else: app('" ".join([a.bldpath(env) for a in task.outputs])')
976 else:
977 if not var in dvars: dvars.append(var)
978 app("p('%s')" % var)
979 if parm: parm = "%% (%s) " % (',\n\t\t'.join(parm))
980 else: parm = ''
982 c = COMPILE_TEMPLATE_SHELL % (line, parm)
984 debug('action: %s', c)
985 return (funex(c), dvars)
987 def compile_fun_noshell(name, line):
989 extr = []
990 def repl(match):
991 g = match.group
992 if g('dollar'): return "$"
993 elif g('subst'): extr.append((g('var'), g('code'))); return "<<|@|>>"
994 return None
996 line2 = reg_act.sub(repl, line)
997 params = line2.split('<<|@|>>')
999 buf = []
1000 dvars = []
1001 app = buf.append
1002 for x in xrange(len(extr)):
1003 params[x] = params[x].strip()
1004 if params[x]:
1005 app("lst.extend(%r)" % params[x].split())
1006 (var, meth) = extr[x]
1007 if var == 'SRC':
1008 if meth: app('lst.append(task.inputs%s)' % meth)
1009 else: app("lst.extend([a.srcpath(env) for a in task.inputs])")
1010 elif var == 'TGT':
1011 if meth: app('lst.append(task.outputs%s)' % meth)
1012 else: app("lst.extend([a.bldpath(env) for a in task.outputs])")
1013 else:
1014 app('lst.extend(to_list(env[%r]))' % var)
1015 if not var in dvars: dvars.append(var)
1017 if params[-1]:
1018 app("lst.extend(%r)" % shlex.split(params[-1]))
1020 fun = COMPILE_TEMPLATE_NOSHELL % "\n\t".join(buf)
1021 debug('action: %s', fun)
1022 return (funex(fun), dvars)
1024 def compile_fun(name, line, shell=None):
1025 "commands can be launched by the shell or not"
1026 if line.find('<') > 0 or line.find('>') > 0 or line.find('&&') > 0:
1027 shell = True
1028 #else:
1029 # shell = False
1031 if shell is None:
1032 if sys.platform == 'win32':
1033 shell = False
1034 else:
1035 shell = True
1037 if shell:
1038 return compile_fun_shell(name, line)
1039 else:
1040 return compile_fun_noshell(name, line)
1042 def simple_task_type(name, line, color='GREEN', vars=[], ext_in=[], ext_out=[], before=[], after=[], shell=None):
1043 """return a new Task subclass with the function run compiled from the line given"""
1044 (fun, dvars) = compile_fun(name, line, shell)
1045 fun.code = line
1046 return task_type_from_func(name, fun, vars or dvars, color, ext_in, ext_out, before, after)
1048 def task_type_from_func(name, func, vars=[], color='GREEN', ext_in=[], ext_out=[], before=[], after=[]):
1049 """return a new Task subclass with the function run compiled from the line given"""
1050 params = {
1051 'run': func,
1052 'vars': vars,
1053 'color': color,
1054 'name': name,
1055 'ext_in': Utils.to_list(ext_in),
1056 'ext_out': Utils.to_list(ext_out),
1057 'before': Utils.to_list(before),
1058 'after': Utils.to_list(after),
1061 cls = type(Task)(name, (Task,), params)
1062 TaskBase.classes[name] = cls
1063 return cls
1065 def always_run(cls):
1066 """Set all task instances of this class to be executed whenever a build is started
1067 The task signature is calculated, but the result of the comparation between
1068 task signatures is bypassed
1070 old = cls.runnable_status
1071 def always(self):
1072 ret = old(self)
1073 if ret == SKIP_ME:
1074 return RUN_ME
1075 return ret
1076 cls.runnable_status = always
1078 def update_outputs(cls):
1079 """When a command is always run, it is possible that the output only change
1080 sometimes. By default the build node have as a hash the signature of the task
1081 which may not change. With this, the output nodes (produced) are hashed,
1082 and the hashes are set to the build nodes
1084 This may avoid unnecessary recompilations, but it uses more resources
1085 (hashing the output files) so it is not used by default
1087 old_post_run = cls.post_run
1088 def post_run(self):
1089 old_post_run(self)
1090 bld = self.generator.bld
1091 for output in self.outputs:
1092 bld.node_sigs[self.env.variant()][output.id] = Utils.h_file(output.abspath(self.env))
1093 bld.task_sigs[output.id] = self.unique_id()
1094 cls.post_run = post_run
1096 old_runnable_status = cls.runnable_status
1097 def runnable_status(self):
1098 status = old_runnable_status(self)
1099 if status != RUN_ME:
1100 return status
1102 uid = self.unique_id()
1103 try:
1104 bld = self.outputs[0].__class__.bld
1105 new_sig = self.signature()
1106 prev_sig = bld.task_sigs[uid][0]
1107 if prev_sig == new_sig:
1108 for x in self.outputs:
1109 if not x.id in bld.node_sigs[self.env.variant()]:
1110 return RUN_ME
1111 if bld.task_sigs[x.id] != uid: # ensure the outputs are associated with *this* task
1112 return RUN_ME
1113 return SKIP_ME
1114 except KeyError:
1115 pass
1116 except IndexError:
1117 pass
1118 return RUN_ME
1119 cls.runnable_status = runnable_status
1121 def extract_outputs(tasks):
1122 """file_deps: Infer additional dependencies from task input and output nodes
1124 v = {}
1125 for x in tasks:
1126 try:
1127 (ins, outs) = v[x.env.variant()]
1128 except KeyError:
1129 ins = {}
1130 outs = {}
1131 v[x.env.variant()] = (ins, outs)
1133 for a in getattr(x, 'inputs', []):
1134 try: ins[a.id].append(x)
1135 except KeyError: ins[a.id] = [x]
1136 for a in getattr(x, 'outputs', []):
1137 try: outs[a.id].append(x)
1138 except KeyError: outs[a.id] = [x]
1140 for (ins, outs) in v.values():
1141 links = set(ins.iterkeys()).intersection(outs.iterkeys())
1142 for k in links:
1143 for a in ins[k]:
1144 for b in outs[k]:
1145 a.set_run_after(b)
1147 def extract_deps(tasks):
1148 """file_deps: Infer additional dependencies from task input and output nodes and from implicit dependencies
1149 returned by the scanners - that will only work if all tasks are created
1151 this is aimed at people who have pathological builds and who do not care enough
1152 to implement the build dependencies properly
1154 with two loops over the list of tasks, do not expect this to be really fast
1157 # first reuse the function above
1158 extract_outputs(tasks)
1160 # map the output nodes to the tasks producing them
1161 out_to_task = {}
1162 for x in tasks:
1163 v = x.env.variant()
1164 try:
1165 lst = x.outputs
1166 except AttributeError:
1167 pass
1168 else:
1169 for node in lst:
1170 out_to_task[(v, node.id)] = x
1172 # map the dependencies found to the tasks compiled
1173 dep_to_task = {}
1174 for x in tasks:
1175 try:
1176 x.signature()
1177 except: # this is on purpose
1178 pass
1180 v = x.env.variant()
1181 key = x.unique_id()
1182 for k in x.generator.bld.node_deps.get(x.unique_id(), []):
1183 try: dep_to_task[(v, k.id)].append(x)
1184 except KeyError: dep_to_task[(v, k.id)] = [x]
1186 # now get the intersection
1187 deps = set(dep_to_task.keys()).intersection(set(out_to_task.keys()))
1189 # and add the dependencies from task to task
1190 for idx in deps:
1191 for k in dep_to_task[idx]:
1192 k.set_run_after(out_to_task[idx])
1194 # cleanup, remove the signatures
1195 for x in tasks:
1196 try:
1197 delattr(x, 'cache_sig')
1198 except AttributeError:
1199 pass