topgit_recurse.awk: include excess visit counts
[topgit/pro.git] / awk / topgit_recurse.awk
blobb7b5d6e4862680a68e785bd749706f8b42c8ef86
1 #!/usr/bin/awk -f
3 # topgit_recurse - TopGit awk utility script used by tg--awksome
4 # Copyright (C) 2017 Kyle J. McKay <mackyle@gmail.com>
5 # All rights reserved.
6 # License GPLv2
8 # topgit_recurse
10 # variable arguments (-v):
12 # brfile if non-empty, read TopGit branch names from here
13 # rmbr if true run system rm on brfile (after reading) if non-empty brfile
14 # anfile if non-empty, annihilated branch names are read from here
15 # rman if true run system rm on anfile (after reading) if non-empty anfile
16 # hdfile if non-empty, existing head names are read from here
17 # rmhd if true run system rm on hdfile (after reading) if non-empty hdfile
18 # cuthd if true extract and cut cuthd field of each "refs/heads/" line
19 # rtfile if non-empty, read non-annihilated remote tg branch names from here
20 # rmrt if true run system rm on rtfile (after reading) if non-empty rtfile
21 # usermt if non-empty output remote base lines using this prefix if in rtfile
22 # withan if true, include L == 2 output lines
23 # withbr if true, output a line for the top-level branch
24 # preord if true, output the branch line before its .topdeps instead of after
25 # exclbr whitespace separated list of names to exclude
26 # inclbr whitespace separated list of names to include
27 # startb whitespace separated list of start branch plus extra path items
28 # multib if "1" startb is list of multiple start nodes (with no extra path)
29 # filter if 1 or 2 output dependency instead of recurse lines (see below)
30 # once if > 0 nodes when 1st visited only if < 0 deps on 1st visit only
31 # leaves if true omit output lines where L != 1 (withbr recommended if set)
32 # tgonly if true only T != 0 (or M == 1) lines are output
33 # showlp if true output a :loop: line for any loops
35 # in multi start mode (multib is true) duplicate start names are ignored
36 # (using a true value for "multib" other than "1" may have undefined behavior)
38 # if inclbr is non-empty a branch name must be listed to appear on stdout
40 # if a branch name appears in exclbr it is omitted from stdout trumping inclbr
42 # except for branches removed entirely from consideration by exclbr/inclbr
43 # or !withan and being in anfile, any other branch found to be missing
44 # (i.e. no hdfile entry) will generate a missing (M=1) line regardless of
45 # any !withbr or leaves or tgonly settings
47 # annihilated branches listed in anfile may also appear in brfile without harm
48 # but they do not need to for correct results (i.e. same results either way)
50 # Note that if non-empty, usermt must be the FULL prefix for remote base ref
51 # names for example "refs/remotes/origin/top-bases" works (if that's the
52 # correct top-bases location of course)
54 # if a branch is excluded (either by not being in a non-empty inclbr list or
55 # by being listed in the exclbr list) then it will not be recursed into either
57 # to get accurate output, brfile, anfile and hdfile must all be provided and,
58 # obviously, if remote information is needed rtfile as well (usermt will be
59 # effectively ignored unless rtfile is provided)
61 # input is a list of edges as output by run_awk_topgit_deps
63 # using the startb starting point the graph is walked outputting one line for
64 # each visited node with the following format:
66 # M T L V <node> [<parent> <branch> <chain> <names>]
68 # where M T L are single numeric digits with the following meanings:
70 # M=0 branch actually exists (i.e. it's NOT missing)
71 # M=1 branch does not exist but was named in a .topdeps or startb (if withbr)
73 # T=0 branch is NOT tgish or NOT local (missing and remotes are always 0)
74 # T=1 branch IS local tgish (annihilated branches are always 1)
75 # T=2 branch IS local tgish and has a non-annihilated remote tgish branch
77 # L=0 not a leaf node (missing and remotes are always 0)
78 # L=1 IS a leaf node (might or might NOT be tgish)
79 # L=2 an annihilated tgish branch (they can never be leaves anyway)
81 # contrary to the non-awk code this replaces, L == 1 IS possible with preord
83 # The V value is a non-negative integer indicating excess visits to this node
84 # where the first visit is not in excess so it's 0 the next visit is the first
85 # excess visit so it's 1 and so on.
87 # note that <node> will always be present and non-empty
88 # unless withbr is true then <parent> will also always be present and non-empty
89 # even if withbr IS true <parent> will be non-empty if extra path items were
90 # provided (the first path item becomes the parent and the rest the chain)
91 # the rest of the path items show the link chain from <node> up to <startb>
92 # with any extra path items output on the end
94 # An output line might look like this:
96 # 0 1 1 0 t/foo/leaf t/foo/int t/stage
98 # L=2 "a leaf" means any node that is either not a TopGit branch or is a
99 # non-annihilated TopGit branch with NO non-annihilated dependencies (that
100 # means NO non-tgish dependencies either)
102 # loops are detected and avoided (the link causing the loop is dropped) and
103 # if showlp is true a line like the following will be output whenever one is
104 # encountered:
106 # :loop: t/foo/int t/foo/leaf t/foo/int t/stage
108 # the two branch names immediately after LOOP show the link that was dropped
109 # to avoid the loop and the rest of the path is the normal branch chain
111 # in filter mode (filter is 1 or 2) the output is a list of deps (1) or
112 # edges (2) instead of recursion lines so the above sample output line would
113 # end up being this when filter mode is 1:
115 # t/foo/leaf
117 # to get the "patch series" list used for navigation use withbr=1 once=1 and
118 # filter=1
120 # and just this output when filter mode is 2:
122 # t/foo/int t/foo/leaf
124 # the first two node items from the recursion line are reversed (if withbr
125 # is active the single node name will be doubled to make an edge to itself)
126 # because the edge format is "<node-with-topdeps-line> <for-this-node>" whereas
127 # the normal recursion lines have the opposite order.
129 # extra items in startb are ignored in filter mode unless multib is "1" in
130 # which case they're then treated as additional starting nodes (just like
131 # normal multib=1 mode does).
133 # when filter mode is active the rtfile file and usermt settings are ignored
134 # (but rmrt will still work) while preord does work it's mostly pointless
135 # in filter mode. the leaves option still works exactly the same way as it's
136 # just the final format of the output line that's affected by filter mode not
137 # which lines (other than omitting remote lines) are output. Lines for
138 # missing (M == 1) items are, however, totally suppressed in filter mode
139 # since they're just not meaningful in that case. loop lines will still be
140 # output in exactly the same format if showlp is true.
142 # filter mode can be helpful when the intent is to ultimately run
143 # awk_topgit_navigate on a subset of the TopGit dependency tree
146 BEGIN { exitcode = "" }
147 function exitnow(e) { exitcode=e; exit e }
148 END { if (exitcode != "") exit exitcode }
150 BEGIN {
151 inconly = 0
152 cnt = split(inclbr, scratch, " ")
153 if (cnt) {
154 inconly = 1
155 for (i = 1; i <= cnt; ++i) incnames[scratch[i]] = 1
157 cnt = split(exclbr, scratch, " ")
158 for (i = 1; i <= cnt; ++i) excnames[scratch[i]] = 1
159 cnt = split(startb, scratch, " ")
160 if (!cnt) exitnow(2)
161 if (multib) {
162 startcnt = 0
163 for (i = 1; i <= cnt; ++i) {
164 if (!seenstartbr[scratch[i]]) {
165 startbr[++startcnt] = scratch[i]
166 extrabr[startcnt] = ""
167 seenstartr[scratch[i]] = 1
170 } else {
171 startbr[1] = scratch[1]
172 xtrapth = ""
173 for (i = 2; i <= cnt; ++i) xtrapth = xtrapth " " scratch[i]
174 extrabr[1] = xtrapth;
175 startcnt = 1
177 sub(/\/+$/, "", usermt)
178 if (usermt != "") usermt = usermt "/"
179 if (filter != "" && filter != 0 && filter != 1 && filter != 2) exitnow(2)
182 function quotevar(v) {
183 gsub(/\047/, "\047\\\047\047", v)
184 return "\047" v "\047"
187 function init(abranch, _e) {
188 rmlist = ""
189 if (brfile != "") {
190 while ((_e = (getline abranch <brfile)) > 0) {
191 if (abranch != "") tgish[abranch] = 1
193 close(brfile)
194 if (_e < 0) exitnow(2)
195 if (rmbr) rmlist = rmlist " " quotevar(brfile)
197 if (anfile != "") {
198 while ((_e = (getline abranch <anfile)) > 0) {
199 if (abranch != "") ann[abranch] = 1
201 close(anfile)
202 if (_e < 0) exitnow(2)
203 if (rman) rmlist = rmlist " " quotevar(anfile)
205 if (hdfile != "") {
206 if (cuthd) {
207 fno = 1
208 if (cuthd ~ /^[1-9][0-9]*$/) fno = 0 + cuthd
210 while ((_e = (getline abranch <hdfile)) > 0) {
211 if (fno) {
212 if (split(abranch, scratch, " ") < fno || length(scratch[fno]) < 12 ||
213 substr(scratch[fno], 1, 11) != "refs/heads/") continue
214 abranch = substr(scratch[fno], 12)
215 sub(/[~:^].*$/, "", abranch)
217 if (abranch != "") heads[abranch] = 1
219 close(hdfile)
220 if (_e < 0) exitnow(2)
221 if (rmhd) rmlist = rmlist " " quotevar(hdfile)
223 if (rtfile != "") {
224 if (!filter) {
225 while ((_e = (getline abranch <rtfile)) > 0) {
226 if (abranch != "") tgishr[abranch] = 1
228 close(rtfile)
229 if (_e < 0) exitnow(2)
231 if (rmrt) rmlist = rmlist " " quotevar(rtfile)
233 if (rmlist != "") system("rm -f" rmlist)
236 function included(abranch) {
237 return (!inconly || incnames[abranch]) && !excnames[abranch]
240 NR == 1 {init()}
242 NF == 2 && $1 != "" && $2 != "" && $1 != $2 &&
243 included($1) && included($2) && !ann[$1] && (withan || !ann[$2]) {
244 linkval = links[$1]
245 if (linkval != "") {
246 if (index(" " linkval " ", " " $2 " ")) next
247 links[$1] = linkval " " $2
248 } else {
249 links[$1] = $2
251 if (withan && !ann[$2]) {
252 # when using withan, linksx is the tree !withan would generate
253 # (it eXcludes all annihilated links)
254 # no need for it if !withan in effect as it would match links
255 linkval = linksx[$1]
256 if (linkval != "") linksx[$1] = linkval " " $2
257 else linksx[$1] = $2
261 function xvisits(node) {
262 if (node in xvisitcnts)
263 xvisitcnts[node] = xvisitcnts[node] + 1
264 else
265 xvisitcnts[node] = 0
266 return xvisitcnts[node]
269 function walktree(node, trail, level,
270 oncenodes, istgish, isleaf, children, childcnt, parent, child, visited, i)
272 if (once > 0 && (node in oncenodes)) return
273 if (!heads[node]) {
274 if (!filter) print "1 0 0 " xvisits(node) " " node trail
275 if (once) oncenodes[node] = 1
276 return
278 if (filter == 2) {
279 parent = substr(trail, 2)
280 sub(/ .*$/, "", parent)
281 if (parent == "") parent = node
282 parent = parent " "
284 istgish = 0
285 isleaf = 0
286 if (ann[node]) {
287 istgish = 1
288 isleaf = 2
289 } else if (tgish[node]) {
290 istgish = tgishr[node] ? 2 : 1
292 if (isleaf != 2) isleaf = !istgish || (withan?linksx[node]:links[node]) == ""
293 if (preord && (level > 0 || withbr) && (!leaves || isleaf == 1) && (!tgonly || istgish)) {
294 if (filter) print parent node
295 else print "0 " istgish " " isleaf " " xvisits(node) " " node trail
297 if (!once || !(node in oncenodes)) {
298 if (istgish == 2 && usermt && !leaves && !tgonly)
299 print "0 0 0 " xvisits(usermt node) " " usermt node " " node trail
300 if ((childcnt = split(links[node], children, " "))) {
301 visited = " " node trail " "
302 for (i = 1; i <= childcnt; ++i) {
303 child = children[i]
304 if (index(visited, " " child " ")) {
305 if (showlp) print ":loop: " child " " node trail
306 continue
308 walktree(child, " " node trail, level + 1, oncenodes)
312 if (!preord && (level > 0 || withbr) && (!leaves || isleaf == 1) && (!tgonly || istgish)) {
313 if (filter) print parent node
314 else print "0 " istgish " " isleaf " " xvisits(node) " " node trail
316 if (once) oncenodes[node] = 1
319 END {
320 for (startidx = 1; startidx <= startcnt; ++startidx) {
321 astart = startbr[startidx]
322 if (included(astart) && (!heads[astart] || withan || !ann[astart]))
323 walktree(astart, extrabr[startidx], 0)