3 # Thomas Nagy, 2005-2008 (ita)
6 Running tasks in parallel is a simple problem, but in practice it is more complicated:
7 * dependencies discovered during the build (dynamic task creation)
8 * dependencies discovered after files are compiled
9 * the amount of tasks and dependencies (graph size) can be huge
11 This is why the dependency management is split on three different levels:
12 1. groups of tasks that run all after another group of tasks
13 2. groups of tasks that can be run in parallel
14 3. tasks that can run in parallel, but with possible unknown ad-hoc dependencies
16 The point #1 represents a strict sequential order between groups of tasks, for example a compiler is produced
17 and used to compile the rest, whereas #2 and #3 represent partial order constraints where #2 applies to the kind of task
18 and #3 applies to the task instances.
20 #1 is held by the task manager: ordered list of TaskGroups (see bld.add_group)
21 #2 is held by the task groups and the task types: precedence after/before (topological sort),
22 and the constraints extracted from file extensions
23 #3 is held by the tasks individually (attribute run_after),
24 and the scheduler (Runner.py) use Task::runnable_status to reorder the tasks
28 To try, use something like this in your code:
29 import Constants, Task
30 Task.algotype = Constants.MAXPARALLEL
34 There are two concepts with the tasks (individual units of change):
35 * dependency (if 1 is recompiled, recompile 2)
36 * order (run 2 after 1)
38 example 1: if t1 depends on t2 and t2 depends on t3 it is not necessary to make t1 depend on t3 (dependency is transitive)
39 example 2: if t1 depends on a node produced by t2, it is not immediately obvious that t1 must run after t2 (order is not obvious)
41 The role of the Task Manager is to give the tasks in order (groups of task that may be run in parallel one after the other)
45 import os
, shutil
, sys
, re
, random
, datetime
, tempfile
, shlex
47 import Build
, Runner
, Utils
, Node
, Logs
, Options
48 from Logs
import debug
, warn
, error
49 from Constants
import *
52 #algotype = JOBCONTROL
53 #algotype = MAXPARALLEL
55 COMPILE_TEMPLATE_SHELL
= '''
58 wd = getattr(task, 'cwd', None)
60 cmd = \'\'\' %s \'\'\' % s
61 return task.exec_command(cmd, cwd=wd)
64 COMPILE_TEMPLATE_NOSHELL
= '''
67 wd = getattr(task, 'cwd', None)
69 if isinstance(xx, str): return [xx]
73 lst = [x for x in lst if x]
74 return task.exec_command(lst, cwd=wd)
79 Enable different kind of dependency algorithms:
80 1 make groups: first compile all cpps and then compile all links (NORMAL)
81 2 parallelize all (each link task run after its dependencies) (MAXPARALLEL)
82 3 like 1 but provide additional constraints for the parallelization (MAXJOBS)
84 In theory 1. will be faster than 2 for waf, but might be slower for builds
85 The scheme 2 will not allow for running tasks one by one so it can cause disk thrashing on huge builds
88 file_deps
= Utils
.nada
90 Additional dependency pre-check may be added by replacing the function file_deps.
91 e.g. extract_outputs, extract_deps below.
94 class TaskManager(object):
95 """The manager is attached to the build object, it holds a list of TaskGroup"""
99 self
.current_group
= 0
100 self
.groups_names
= {}
102 def group_name(self
, g
):
103 """name for the group g (utility)"""
104 if not isinstance(g
, TaskGroup
):
106 for x
in self
.groups_names
:
107 if id(self
.groups_names
[x
]) == id(g
):
111 def group_idx(self
, tg
):
112 """group the task generator tg is in"""
114 for i
in range(len(self
.groups
)):
116 for t
in g
.tasks_gen
:
121 def get_next_set(self
):
122 """return the next set of tasks to execute
123 the first parameter is the maximum amount of parallelization that may occur"""
125 while not ret
and self
.current_group
< len(self
.groups
):
126 ret
= self
.groups
[self
.current_group
].get_next_set()
129 self
.groups
[self
.current_group
].process_install()
130 self
.current_group
+= 1
133 def add_group(self
, name
=None, set=True):
134 #if self.groups and not self.groups[0].tasks:
135 # error('add_group: an empty group is already present')
138 if name
and name
in self
.groups_names
:
139 error('add_group: name %s already present' % name
)
140 self
.groups_names
[name
] = g
141 self
.groups
.append(g
)
143 self
.current_group
= len(self
.groups
) - 1
145 def set_group(self
, idx
):
146 if isinstance(idx
, str):
147 g
= self
.groups_names
[idx
]
148 for x
in xrange(len(self
.groups
)):
149 if id(g
) == id(self
.groups
[x
]):
150 self
.current_group
= x
152 self
.current_group
= idx
154 def add_task_gen(self
, tgen
):
155 if not self
.groups
: self
.add_group()
156 self
.groups
[self
.current_group
].tasks_gen
.append(tgen
)
158 def add_task(self
, task
):
159 if not self
.groups
: self
.add_group()
160 self
.groups
[self
.current_group
].tasks
.append(task
)
164 if not self
.groups
: return 0
165 for group
in self
.groups
:
166 total
+= len(group
.tasks
)
169 def add_finished(self
, tsk
):
170 self
.tasks_done
.append(tsk
)
171 bld
= tsk
.generator
.bld
174 if 'install' in tsk
.__dict
__:
175 f
= tsk
.__dict
__['install']
176 # install=0 to prevent installation
181 class TaskGroup(object):
182 "the compilation of one group does not begin until the previous group has finished (in the manager)"
184 self
.tasks
= [] # this list will be consumed
187 self
.cstr_groups
= Utils
.DefaultDict(list) # tasks having equivalent constraints
188 self
.cstr_order
= Utils
.DefaultDict(set) # partial order between the cstr groups
189 self
.temp_tasks
= [] # tasks put on hold
194 "clears the state of the object (put back the tasks into self.tasks)"
195 for x
in self
.cstr_groups
:
196 self
.tasks
+= self
.cstr_groups
[x
]
197 self
.tasks
= self
.temp_tasks
+ self
.tasks
199 self
.cstr_groups
= Utils
.DefaultDict(list)
200 self
.cstr_order
= Utils
.DefaultDict(set)
203 def process_install(self
):
204 for (f
, k
, kw
) in self
.post_funs
:
208 "prepare the scheduling"
210 file_deps(self
.tasks
)
211 self
.make_cstr_groups()
212 self
.extract_constraints()
214 def get_next_set(self
):
215 "next list of tasks to execute using max job settings, returns (maxjobs, task_list)"
217 if algotype
== NORMAL
:
218 tasks
= self
.tasks_in_parallel()
220 elif algotype
== JOBCONTROL
:
221 (maxj
, tasks
) = self
.tasks_by_max_jobs()
222 elif algotype
== MAXPARALLEL
:
223 tasks
= self
.tasks_with_inner_constraints()
226 raise Utils
.WafError("unknown algorithm type %s" % (algotype
))
228 if not tasks
: return ()
231 def make_cstr_groups(self
):
232 "unite the tasks that have similar constraints"
233 self
.cstr_groups
= Utils
.DefaultDict(list)
235 h
= x
.hash_constraints()
236 self
.cstr_groups
[h
].append(x
)
238 def set_order(self
, a
, b
):
239 self
.cstr_order
[a
].add(b
)
241 def compare_exts(self
, t1
, t2
):
242 "extension production"
246 out_
= t2
.attr(y
, ())
251 out_
= t1
.attr(y
, ())
257 def compare_partial(self
, t1
, t2
):
258 "partial relations after/before"
261 name
= t2
.__class__
.__name
__
262 if name
in Utils
.to_list(t1
.attr(m
, ())): return -1
263 elif name
in Utils
.to_list(t1
.attr(n
, ())): return 1
264 name
= t1
.__class__
.__name
__
265 if name
in Utils
.to_list(t2
.attr(m
, ())): return 1
266 elif name
in Utils
.to_list(t2
.attr(n
, ())): return -1
269 def extract_constraints(self
):
270 "extract the parallelization constraints from the tasks with different constraints"
271 keys
= self
.cstr_groups
.keys()
273 # hopefully the length of this list is short
274 for i
in xrange(max):
275 t1
= self
.cstr_groups
[keys
[i
]][0]
276 for j
in xrange(i
+ 1, max):
277 t2
= self
.cstr_groups
[keys
[j
]][0]
279 # add the constraints based on the comparisons
280 val
= (self
.compare_exts(t1
, t2
)
281 or self
.compare_partial(t1
, t2
)
284 self
.set_order(keys
[i
], keys
[j
])
286 self
.set_order(keys
[j
], keys
[i
])
288 def tasks_in_parallel(self
):
289 "(NORMAL) next list of tasks that may be executed in parallel"
291 if not self
.ready
: self
.prepare()
293 keys
= self
.cstr_groups
.keys()
299 for k
in self
.cstr_order
.values():
304 unconnected
.append(u
)
307 for y
in unconnected
:
308 toreturn
.extend(self
.cstr_groups
[y
])
310 # remove stuff only after
311 for y
in unconnected
:
312 try: self
.cstr_order
.__delitem
__(y
)
313 except KeyError: pass
314 self
.cstr_groups
.__delitem
__(y
)
316 if not toreturn
and remainder
:
317 raise Utils
.WafError("circular order constraint detected %r" % remainder
)
321 def tasks_by_max_jobs(self
):
322 "(JOBCONTROL) returns the tasks that can run in parallel with the max amount of jobs"
323 if not self
.ready
: self
.prepare()
324 if not self
.temp_tasks
: self
.temp_tasks
= self
.tasks_in_parallel()
325 if not self
.temp_tasks
: return (None, None)
330 for t
in self
.temp_tasks
:
331 m
= getattr(t
, "maxjobs", getattr(self
.__class
__, "maxjobs", MAXJOBS
))
340 self
.temp_tasks
= remaining
341 return (maxjobs
, ret
)
343 def tasks_with_inner_constraints(self
):
344 """(MAXPARALLEL) returns all tasks in this group, but add the constraints on each task instance
345 as an optimization, it might be desirable to discard the tasks which do not have to run"""
346 if not self
.ready
: self
.prepare()
348 if getattr(self
, "done", None): return None
350 for p
in self
.cstr_order
:
351 for v
in self
.cstr_order
[p
]:
352 for m
in self
.cstr_groups
[p
]:
353 for n
in self
.cstr_groups
[v
]:
355 self
.cstr_order
= Utils
.DefaultDict(set)
356 self
.cstr_groups
= Utils
.DefaultDict(list)
358 return self
.tasks
[:] # make a copy
360 class store_task_type(type):
361 "store the task types that have a name ending in _task into a map (remember the existing task types)"
362 def __init__(cls
, name
, bases
, dict):
363 super(store_task_type
, cls
).__init
__(name
, bases
, dict)
366 if name
.endswith('_task'):
367 name
= name
.replace('_task', '')
368 if name
!= 'TaskBase':
369 TaskBase
.classes
[name
] = cls
371 class TaskBase(object):
372 """Base class for all Waf tasks
374 The most important methods are (by usual order of call):
375 1 runnable_status: ask the task if it should be run, skipped, or if we have to ask later
376 2 __str__: string to display to the user
377 3 run: execute the task
378 4 post_run: after the task is run, update the cache about the task
380 This class should be seen as an interface, it provides the very minimum necessary for the scheduler
381 so it does not do much.
383 For illustration purposes, TaskBase instances try to execute self.fun (if provided)
386 __metaclass__
= store_task_type
393 def __init__(self
, *k
, **kw
):
394 self
.hasrun
= NOT_RUN
397 self
.generator
= kw
['generator']
399 self
.generator
= self
402 if kw
.get('normal', 1):
403 self
.generator
.bld
.task_manager
.add_task(self
)
407 return '\n\t{task: %s %s}' % (self
.__class
__.__name
__, str(getattr(self
, "fun", "")))
410 "string to display to the user"
411 if hasattr(self
, 'fun'):
412 return 'executing: %s\n' % self
.fun
.__name
__
413 return self
.__class
__.__name
__ + '\n'
415 def exec_command(self
, *k
, **kw
):
416 "use this for executing commands from tasks"
417 # TODO in waf 1.6, eliminate bld.exec_command, and move the cwd processing to here
419 kw
['env'] = self
.env
['env']
420 return self
.generator
.bld
.exec_command(*k
, **kw
)
422 def runnable_status(self
):
423 "RUN_ME SKIP_ME or ASK_LATER"
426 def can_retrieve_cache(self
):
430 if self
.can_retrieve_cache():
435 "called if the task must run"
436 if hasattr(self
, 'fun'):
437 return self
.fun(self
)
441 "update the dependency tree (node stats)"
445 "print either the description (using __str__) or the progress bar or the ide output"
446 col1
= Logs
.colors(self
.color
)
447 col2
= Logs
.colors
.NORMAL
449 if Options
.options
.progress_bar
== 1:
450 return self
.generator
.bld
.progress_line(self
.position
[0], self
.position
[1], col1
, col2
)
452 if Options
.options
.progress_bar
== 2:
453 ela
= Utils
.get_elapsed_time(self
.generator
.bld
.ini
)
455 ins
= ','.join([n
.name
for n
in self
.inputs
])
456 except AttributeError:
459 outs
= ','.join([n
.name
for n
in self
.outputs
])
460 except AttributeError:
462 return '|Total %s|Current %s|Inputs %s|Outputs %s|Time %s|\n' % (self
.position
[1], self
.position
[0], ins
, outs
, ela
)
464 total
= self
.position
[1]
466 fs
= '[%%%dd/%%%dd] %%s%%s%%s' % (n
, n
)
467 return fs
% (self
.position
[0], self
.position
[1], col1
, str(self
), col2
)
469 def attr(self
, att
, default
=None):
470 "retrieve an attribute from the instance or from the class (microoptimization here)"
471 ret
= getattr(self
, att
, self
)
472 if ret
is self
: return getattr(self
.__class
__, att
, default
)
475 def hash_constraints(self
):
476 "identify a task type for all the constraints relevant for the scheduler: precedence, file production"
478 sum = hash((self
.__class
__.__name
__,
479 str(a('before', '')),
481 str(a('ext_in', '')),
482 str(a('ext_out', '')),
483 self
.__class
__.maxjobs
))
486 def format_error(self
):
487 "error message to display to the user (when a build fails)"
488 if getattr(self
, "err_msg", None):
490 elif self
.hasrun
== CRASHED
:
492 return " -> task failed (err #%d): %r" % (self
.err_code
, self
)
493 except AttributeError:
494 return " -> task failed: %r" % self
495 elif self
.hasrun
== MISSING
:
496 return " -> missing files: %r" % self
502 installation is performed by looking at the task attributes:
503 * install_path: installation path like "${PREFIX}/bin"
504 * filename: install the first node in the outputs as a file with a particular name, be certain to give os.sep
507 bld
= self
.generator
.bld
508 d
= self
.attr('install')
510 if self
.attr('install_path'):
511 lst
= [a
.relpath_gen(bld
.srcnode
) for a
in self
.outputs
]
512 perm
= self
.attr('chmod', O644
)
514 # if src is given, install the sources too
515 lst
+= [a
.relpath_gen(bld
.srcnode
) for a
in self
.inputs
]
516 if self
.attr('filename'):
517 dir = self
.install_path
.rstrip(os
.sep
) + os
.sep
+ self
.attr('filename')
518 bld
.install_as(dir, lst
[0], self
.env
, perm
)
520 bld
.install_files(self
.install_path
, lst
, self
.env
, perm
)
522 class Task(TaskBase
):
523 """The parent class is quite limited, in this version:
524 * file system interaction: input and output nodes
525 * persistence: do not re-execute tasks that have already run
526 * caching: same files can be saved and retrieved from a cache directory
528 implicit, like .c files depending on .h files
529 explicit, like the input nodes or the dep_nodes
530 environment variables, like the CXXFLAGS in self.env
533 def __init__(self
, env
, **kw
):
534 TaskBase
.__init
__(self
, **kw
)
537 # inputs and outputs are nodes
538 # use setters when possible
545 # Additionally, you may define the following
546 #self.dep_vars = 'PREFIX DATADIR'
549 "string to display to the user"
551 src_str
= ' '.join([a
.nice_path(env
) for a
in self
.inputs
])
552 tgt_str
= ' '.join([a
.nice_path(env
) for a
in self
.outputs
])
553 if self
.outputs
: sep
= ' -> '
555 return '%s: %s%s%s\n' % (self
.__class
__.__name
__.replace('_task', ''), src_str
, sep
, tgt_str
)
558 return "".join(['\n\t{task: ', self
.__class
__.__name
__, " ", ",".join([x
.name
for x
in self
.inputs
]), " -> ", ",".join([x
.name
for x
in self
.outputs
]), '}'])
561 "get a unique id: hash the node paths, the variant, the class, the function"
564 except AttributeError:
565 "this is not a real hot zone, but we want to avoid surprizes here"
568 up(self
.__class
__.__name
__)
569 up(self
.env
.variant())
571 for x
in self
.inputs
+ self
.outputs
:
574 up(x
.parent
.abspath())
576 self
.uid
= m
.digest()
579 def set_inputs(self
, inp
):
580 if isinstance(inp
, list): self
.inputs
+= inp
581 else: self
.inputs
.append(inp
)
583 def set_outputs(self
, out
):
584 if isinstance(out
, list): self
.outputs
+= out
585 else: self
.outputs
.append(out
)
587 def set_run_after(self
, task
):
588 "set (scheduler) order on another task"
589 # TODO: handle list or object
590 assert isinstance(task
, TaskBase
)
591 self
.run_after
.append(task
)
593 def add_file_dependency(self
, filename
):
594 "TODO user-provided file dependencies"
595 node
= self
.generator
.bld
.path
.find_resource(filename
)
596 self
.dep_nodes
.append(node
)
599 # compute the result one time, and suppose the scan_signature will give the good result
600 try: return self
.cache_sig
[0]
601 except AttributeError: pass
606 exp_sig
= self
.sig_explicit_deps()
609 var_sig
= self
.sig_vars()
616 imp_sig
= self
.sig_implicit_deps()
618 return self
.signature()
620 # we now have the signature (first element) and the details (for debugging)
621 ret
= self
.m
.digest()
622 self
.cache_sig
= (ret
, exp_sig
, imp_sig
, var_sig
)
625 def runnable_status(self
):
626 "SKIP_ME RUN_ME or ASK_LATER"
627 #return 0 # benchmarking
629 if self
.inputs
and (not self
.outputs
):
630 if not getattr(self
.__class
__, 'quiet', None):
631 warn("invalid task (no inputs OR outputs): override in a Task subclass or set the attribute 'quiet' %r" % self
)
633 for t
in self
.run_after
:
638 bld
= self
.generator
.bld
640 # first compute the signature
641 new_sig
= self
.signature()
643 # compare the signature to a signature computed previously
644 key
= self
.unique_id()
646 prev_sig
= bld
.task_sigs
[key
][0]
648 debug("task: task %r must run as it was never run before or the task code changed", self
)
651 # compare the signatures of the outputs
652 for node
in self
.outputs
:
653 variant
= node
.variant(env
)
655 if bld
.node_sigs
[variant
][node
.id] != new_sig
:
658 debug("task: task %r must run as the output nodes do not exist", self
)
662 if Logs
.verbose
: self
.debug_why(bld
.task_sigs
[key
])
664 if new_sig
!= prev_sig
:
669 "called after a successful task run"
670 bld
= self
.generator
.bld
672 sig
= self
.signature()
673 ssig
= sig
.encode('hex')
675 variant
= env
.variant()
676 for node
in self
.outputs
:
677 # check if the node exists ..
679 os
.stat(node
.abspath(env
))
681 self
.hasrun
= MISSING
682 self
.err_msg
= '-> missing file: %r' % node
.abspath(env
)
685 # important, store the signature for the next run
686 bld
.node_sigs
[variant
][node
.id] = sig
687 bld
.task_sigs
[self
.unique_id()] = self
.cache_sig
689 # file caching, if possible
690 # try to avoid data corruption as much as possible
691 if not Options
.cache_global
or Options
.options
.nocache
or not self
.outputs
:
694 if getattr(self
, 'cached', None):
697 dname
= os
.path
.join(Options
.cache_global
, ssig
)
698 tmpdir
= tempfile
.mkdtemp(prefix
=Options
.cache_global
+ os
.sep
+ 'waf')
707 for node
in self
.outputs
:
708 variant
= node
.variant(env
)
709 dest
= os
.path
.join(tmpdir
, str(i
) + node
.name
)
710 shutil
.copy2(node
.abspath(env
), dest
)
712 except (OSError, IOError):
714 shutil
.rmtree(tmpdir
)
719 os
.rename(tmpdir
, dname
)
722 shutil
.rmtree(tmpdir
)
727 os
.chmod(dname
, O755
)
731 def can_retrieve_cache(self
):
733 Retrieve build nodes from the cache
734 update the file timestamps to help cleaning the least used entries from the cache
735 additionally, set an attribute 'cached' to avoid re-creating the same cache files
737 suppose there are files in cache/dir1/file1 and cache/dir2/file2
738 first, read the timestamp of dir1
739 then try to copy the files
740 then look at the timestamp again, if it has changed, the data may have been corrupt (cache update by another process)
741 should an exception occur, ignore the data
743 if not Options
.cache_global
or Options
.options
.nocache
or not self
.outputs
:
747 sig
= self
.signature()
748 ssig
= sig
.encode('hex')
750 # first try to access the cache folder for the task
751 dname
= os
.path
.join(Options
.cache_global
, ssig
)
753 t1
= os
.stat(dname
).st_mtime
758 for node
in self
.outputs
:
759 variant
= node
.variant(env
)
761 orig
= os
.path
.join(dname
, str(i
) + node
.name
)
763 shutil
.copy2(orig
, node
.abspath(env
))
764 # mark the cache file as used recently (modified)
766 except (OSError, IOError):
767 debug('task: failed retrieving file')
771 # is it the same folder?
773 t2
= os
.stat(dname
).st_mtime
780 for node
in self
.outputs
:
781 self
.generator
.bld
.node_sigs
[variant
][node
.id] = sig
782 if Options
.options
.progress_bar
< 1:
783 self
.generator
.bld
.printout('restoring from cache %r\n' % node
.bldpath(env
))
788 def debug_why(self
, old_sigs
):
789 "explains why a task is run"
791 new_sigs
= self
.cache_sig
793 return x
.encode('hex')
795 debug("Task %r", self
)
796 msgs
= ['Task must run', '* Source file or manual dependency', '* Implicit dependency', '* Environment variable']
797 tmp
= 'task: -> %s: %s %s'
798 for x
in xrange(len(msgs
)):
799 if (new_sigs
[x
] != old_sigs
[x
]):
800 debug(tmp
, msgs
[x
], v(old_sigs
[x
]), v(new_sigs
[x
]))
802 def sig_explicit_deps(self
):
803 bld
= self
.generator
.bld
807 for x
in self
.inputs
+ getattr(self
, 'dep_nodes', []):
808 if not x
.parent
.id in bld
.cache_scanned_folders
:
811 variant
= x
.variant(self
.env
)
813 up(bld
.node_sigs
[variant
][x
.id])
815 raise Utils
.WafError('Missing node signature for %r (required by %r)' % (x
, self
))
817 # manual dependencies, they can slow down the builds
819 additional_deps
= bld
.deps_man
820 for x
in self
.inputs
+ self
.outputs
:
822 d
= additional_deps
[x
.id]
827 if isinstance(v
, Node
.Node
):
829 variant
= v
.variant(self
.env
)
831 v
= bld
.node_sigs
[variant
][v
.id]
833 raise Utils
.WafError('Missing node signature for %r (required by %r)' % (v
, self
))
834 elif hasattr(v
, '__call__'):
835 v
= v() # dependency is a function, call it
838 for x
in self
.dep_nodes
:
839 v
= bld
.node_sigs
[x
.variant(self
.env
)][x
.id]
842 return self
.m
.digest()
845 bld
= self
.generator
.bld
848 # dependencies on the environment vars
849 act_sig
= bld
.hash_env_vars(env
, self
.__class
__.vars)
850 self
.m
.update(act_sig
)
852 # additional variable dependencies, if provided
853 dep_vars
= getattr(self
, 'dep_vars', None)
855 self
.m
.update(bld
.hash_env_vars(env
, dep_vars
))
857 return self
.m
.digest()
859 #def scan(self, node):
860 # """this method returns a tuple containing:
861 # * a list of nodes corresponding to real files
862 # * a list of names for files not found in path_lst
863 # the input parameters may have more parameters that the ones used below
868 # compute the signature, recompute it if there is no match in the cache
869 def sig_implicit_deps(self
):
870 "the signature obtained may not be the one if the files have changed, we do it in two steps"
872 bld
= self
.generator
.bld
874 # get the task signatures from previous runs
875 key
= self
.unique_id()
876 prev_sigs
= bld
.task_sigs
.get(key
, ())
880 if prev_sigs
[2] == self
.compute_sig_implicit_deps():
882 except (KeyError, OSError):
884 del bld
.task_sigs
[key
]
885 raise ValueError('rescan')
887 # no previous run or the signature of the dependencies has changed, rescan the dependencies
888 (nodes
, names
) = self
.scan()
890 debug('deps: scanner for %s returned %s %s', str(self
), str(nodes
), str(names
))
892 # store the dependencies in the cache
893 bld
.node_deps
[key
] = nodes
894 bld
.raw_deps
[key
] = names
896 # recompute the signature and return it
898 sig
= self
.compute_sig_implicit_deps()
902 for k
in bld
.node_deps
.get(self
.unique_id(), []):
903 if k
.id & 3 == 2: # Node.FILE:
904 if not k
.id in bld
.node_sigs
[0]:
907 if not k
.id in bld
.node_sigs
[self
.env
.variant()]:
911 raise Utils
.WafError('Missing node signature for %r (for implicit dependencies %r)' % (nodes
, self
))
915 def compute_sig_implicit_deps(self
):
916 """it is intended for .cpp and inferred .h files
917 there is a single list (no tree traversal)
918 this is the hot spot so ... do not touch"""
921 bld
= self
.generator
.bld
922 tstamp
= bld
.node_sigs
925 for k
in bld
.node_deps
.get(self
.unique_id(), []):
926 # unlikely but necessary if it happens
927 if not k
.parent
.id in bld
.cache_scanned_folders
:
928 # if the parent folder is removed, an OSError may be thrown
931 # if the parent folder is removed, a KeyError will be thrown
932 if k
.id & 3 == 2: # Node.FILE:
935 upd(tstamp
[env
.variant()][k
.id])
937 return self
.m
.digest()
944 reg_act
= re
.compile(r
"(?P<backslash>\\)|(?P<dollar>\$\$)|(?P<subst>\$\{(?P<var>\w+)(?P<code>.*?)\})", re
.M
)
945 def compile_fun_shell(name
, line
):
946 """Compiles a string (once) into a function, eg:
947 simple_task_type('c++', '${CXX} -o ${TGT[0]} ${SRC} -I ${SRC[0].parent.bldpath()}')
949 The env variables (CXX, ..) on the task must not hold dicts (order)
950 The reserved keywords TGT and SRC represent the task input and output nodes
953 bld(source='wscript', rule='echo "foo\\${SRC[0].name}\\bar"')
959 if g('dollar'): return "$"
960 elif g('backslash'): return '\\\\'
961 elif g('subst'): extr
.append((g('var'), g('code'))); return "%s"
964 line
= reg_act
.sub(repl
, line
) or line
969 for (var
, meth
) in extr
:
971 if meth
: app('task.inputs%s' % meth
)
972 else: app('" ".join([a.srcpath(env) for a in task.inputs])')
974 if meth
: app('task.outputs%s' % meth
)
975 else: app('" ".join([a.bldpath(env) for a in task.outputs])')
977 if not var
in dvars
: dvars
.append(var
)
979 if parm
: parm
= "%% (%s) " % (',\n\t\t'.join(parm
))
982 c
= COMPILE_TEMPLATE_SHELL
% (line
, parm
)
984 debug('action: %s', c
)
985 return (funex(c
), dvars
)
987 def compile_fun_noshell(name
, line
):
992 if g('dollar'): return "$"
993 elif g('subst'): extr
.append((g('var'), g('code'))); return "<<|@|>>"
996 line2
= reg_act
.sub(repl
, line
)
997 params
= line2
.split('<<|@|>>')
1002 for x
in xrange(len(extr
)):
1003 params
[x
] = params
[x
].strip()
1005 app("lst.extend(%r)" % params
[x
].split())
1006 (var
, meth
) = extr
[x
]
1008 if meth
: app('lst.append(task.inputs%s)' % meth
)
1009 else: app("lst.extend([a.srcpath(env) for a in task.inputs])")
1011 if meth
: app('lst.append(task.outputs%s)' % meth
)
1012 else: app("lst.extend([a.bldpath(env) for a in task.outputs])")
1014 app('lst.extend(to_list(env[%r]))' % var
)
1015 if not var
in dvars
: dvars
.append(var
)
1018 app("lst.extend(%r)" % shlex
.split(params
[-1]))
1020 fun
= COMPILE_TEMPLATE_NOSHELL
% "\n\t".join(buf
)
1021 debug('action: %s', fun
)
1022 return (funex(fun
), dvars
)
1024 def compile_fun(name
, line
, shell
=None):
1025 "commands can be launched by the shell or not"
1026 if line
.find('<') > 0 or line
.find('>') > 0 or line
.find('&&') > 0:
1032 if sys
.platform
== 'win32':
1038 return compile_fun_shell(name
, line
)
1040 return compile_fun_noshell(name
, line
)
1042 def simple_task_type(name
, line
, color
='GREEN', vars=[], ext_in
=[], ext_out
=[], before
=[], after
=[], shell
=None):
1043 """return a new Task subclass with the function run compiled from the line given"""
1044 (fun
, dvars
) = compile_fun(name
, line
, shell
)
1046 return task_type_from_func(name
, fun
, vars or dvars
, color
, ext_in
, ext_out
, before
, after
)
1048 def task_type_from_func(name
, func
, vars=[], color
='GREEN', ext_in
=[], ext_out
=[], before
=[], after
=[]):
1049 """return a new Task subclass with the function run compiled from the line given"""
1055 'ext_in': Utils
.to_list(ext_in
),
1056 'ext_out': Utils
.to_list(ext_out
),
1057 'before': Utils
.to_list(before
),
1058 'after': Utils
.to_list(after
),
1061 cls
= type(Task
)(name
, (Task
,), params
)
1062 TaskBase
.classes
[name
] = cls
1065 def always_run(cls
):
1066 """Set all task instances of this class to be executed whenever a build is started
1067 The task signature is calculated, but the result of the comparation between
1068 task signatures is bypassed
1070 old
= cls
.runnable_status
1076 cls
.runnable_status
= always
1078 def update_outputs(cls
):
1079 """When a command is always run, it is possible that the output only change
1080 sometimes. By default the build node have as a hash the signature of the task
1081 which may not change. With this, the output nodes (produced) are hashed,
1082 and the hashes are set to the build nodes
1084 This may avoid unnecessary recompilations, but it uses more resources
1085 (hashing the output files) so it is not used by default
1087 old_post_run
= cls
.post_run
1090 bld
= self
.generator
.bld
1091 for output
in self
.outputs
:
1092 bld
.node_sigs
[self
.env
.variant()][output
.id] = Utils
.h_file(output
.abspath(self
.env
))
1093 bld
.task_sigs
[output
.id] = self
.unique_id()
1094 cls
.post_run
= post_run
1096 old_runnable_status
= cls
.runnable_status
1097 def runnable_status(self
):
1098 status
= old_runnable_status(self
)
1099 if status
!= RUN_ME
:
1102 uid
= self
.unique_id()
1104 bld
= self
.outputs
[0].__class
__.bld
1105 new_sig
= self
.signature()
1106 prev_sig
= bld
.task_sigs
[uid
][0]
1107 if prev_sig
== new_sig
:
1108 for x
in self
.outputs
:
1109 if not x
.id in bld
.node_sigs
[self
.env
.variant()]:
1111 if bld
.task_sigs
[x
.id] != uid
: # ensure the outputs are associated with *this* task
1119 cls
.runnable_status
= runnable_status
1121 def extract_outputs(tasks
):
1122 """file_deps: Infer additional dependencies from task input and output nodes
1127 (ins
, outs
) = v
[x
.env
.variant()]
1131 v
[x
.env
.variant()] = (ins
, outs
)
1133 for a
in getattr(x
, 'inputs', []):
1134 try: ins
[a
.id].append(x
)
1135 except KeyError: ins
[a
.id] = [x
]
1136 for a
in getattr(x
, 'outputs', []):
1137 try: outs
[a
.id].append(x
)
1138 except KeyError: outs
[a
.id] = [x
]
1140 for (ins
, outs
) in v
.values():
1141 links
= set(ins
.iterkeys()).intersection(outs
.iterkeys())
1147 def extract_deps(tasks
):
1148 """file_deps: Infer additional dependencies from task input and output nodes and from implicit dependencies
1149 returned by the scanners - that will only work if all tasks are created
1151 this is aimed at people who have pathological builds and who do not care enough
1152 to implement the build dependencies properly
1154 with two loops over the list of tasks, do not expect this to be really fast
1157 # first reuse the function above
1158 extract_outputs(tasks
)
1160 # map the output nodes to the tasks producing them
1166 except AttributeError:
1170 out_to_task
[(v
, node
.id)] = x
1172 # map the dependencies found to the tasks compiled
1177 except: # this is on purpose
1182 for k
in x
.generator
.bld
.node_deps
.get(x
.unique_id(), []):
1183 try: dep_to_task
[(v
, k
.id)].append(x
)
1184 except KeyError: dep_to_task
[(v
, k
.id)] = [x
]
1186 # now get the intersection
1187 deps
= set(dep_to_task
.keys()).intersection(set(out_to_task
.keys()))
1189 # and add the dependencies from task to task
1191 for k
in dep_to_task
[idx
]:
1192 k
.set_run_after(out_to_task
[idx
])
1194 # cleanup, remove the signatures
1197 delattr(x
, 'cache_sig')
1198 except AttributeError: