topgit: version 0.19.13
[topgit/pro.git] / awk / topgit_recurse.awk
blob2b30379c7b24228ca33a2ecd0739d02c057fd92a
1 #!/usr/bin/awk -f
3 # topgit_recurse - TopGit awk utility script used by tg--awksome
4 # Copyright (C) 2017,2019 Kyle J. McKay <mackyle@gmail.com>
5 # All rights reserved.
6 # License GPLv2
8 # topgit_recurse
10 # variable arguments (-v):
12 # brfile if non-empty, read TopGit (tg) branch names from here
13 # rmbr if true run system rm on brfile (after reading) if non-empty brfile
14 # anfile if non-empty, annihilated TopGit branch names are read from here
15 # rman if true run system rm on anfile (after reading) if non-empty anfile
16 # hdfile if non-empty, existing git branch (heads) names are read from here
17 # rmhd if true run system rm on hdfile (after reading) if non-empty hdfile
18 # cuthd if true extract and cut cuthd field of each "refs/heads/" line
19 # rtfile if non-empty, read non-annihilated remote tg branch names from here
20 # rmrt if true run system rm on rtfile (after reading) if non-empty rtfile
21 # usermt if non-empty output remote base lines using this prefix if in rtfile
22 # withan if true, include L == 2 output lines
23 # withbr if true, output a line for the top-level branch
24 # preord if true, output the branch line before its .topdeps instead of after
25 # exclbr whitespace separated list of names to exclude
26 # inclbr whitespace separated list of names to include
27 # startb whitespace separated list of start branch plus extra path items
28 # multib if true startb is list of multiple start nodes (with no extra path)
29 # filter if 1 or 2 output dependency instead of recurse lines (see below)
30 # once if > 0 nodes when 1st visited only if < 0 deps on 1st visit only
31 # leaves if true omit output lines where L != 1 (withbr recommended if set)
32 # tgonly if true only T != 0 (or M == 1) lines are output
33 # showlp if true output a :loop: line for any loops (even when filter != 0)
35 # NOTE: a non-empty startb value IS REQUIRED!
37 # in multi start mode (multib is true) duplicate start names are ignored
38 # unless multib is an integer > 1
40 # with !preord (default), a post-order tree traversal is done, with preord
41 # a pre-order tree traversal is done
43 # for inclbr and exclbr the "edge" referred to is the edge that would be
44 # output by filter=2 (even if filter is not set to that value) and in the
45 # case of withbr=1 the "top-level branch" line will be an edge to self
47 # if inclbr is non-empty only edges with at least one end listed in inclbr
48 # will appear on stdout
50 # if a branch name appears in exclbr any edge with either end listed in exclbr
51 # will be omitted from stdout trumping inclbr
53 # when withbr is true, the "top-level branch" line undergoes inclbr/exclbr
54 # processing too which can end up suppressing it from the output
56 # if a branch is listed in exclbr then all edges going to/from that branch
57 # are necessarily omitted which means effectively that any branch listed in
58 # exclbr has its .topdeps file treated as though it were empty
60 # except for branches removed entirely from consideration by exclbr/inclbr
61 # or !withan and being in anfile, any other branch found to be missing
62 # (i.e. no hdfile entry) will generate a missing (M=1) line regardless of
63 # any !withbr or leaves or tgonly settings
65 # annihilated branches listed in anfile may also appear in brfile without harm
66 # but they do not need to for correct results (i.e. same results either way)
68 # Note that if non-empty, usermt must be the FULL prefix for remote base ref
69 # names for example "refs/remotes/origin/top-bases" works (if that's the
70 # correct top-bases location of course)
72 # if a branch is excluded (either by not being in a non-empty inclbr list or
73 # by being listed in the exclbr list) then it will not be recursed into either
74 # (but the node itself can appear in the output if it's NOT in a non-empty
75 # inclbr list nor in the exclbr list and an inclbr branch has an edge to it)
77 # to get accurate output, brfile, anfile and hdfile must all be provided and,
78 # obviously, if remote information is needed rtfile as well (usermt will be
79 # effectively ignored unless rtfile is provided)
81 # input is a list of edges as output by run_awk_topgit_deps
83 # using the startb starting point the graph is walked outputting one line for
84 # each visited node with the following format:
86 # M T L V <node> [<parent> <branch> <chain> <names>]
88 # where M T L are single numeric digits with the following meanings:
90 # M=0 branch actually exists (i.e. it's NOT missing)
91 # M=1 branch does not exist but was named in a .topdeps or startb
93 # T=0 branch is NOT tgish or NOT local (missing and remotes are always 0)
94 # T=1 branch IS local tgish (annihilated branches are always 1)
95 # T=2 branch IS local tgish and has a non-annihilated remote tgish branch
97 # L=0 not a leaf node (missing and remotes are always 0)
98 # L=1 IS a leaf node (might or might NOT be tgish)
99 # L=2 an annihilated tgish branch (they can never be leaves anyway)
101 # contrary to the non-awk code this replaces, L == 1 IS possible with preord
103 # The V value is a non-negative integer indicating excess visits to this node
104 # where the first visit is not in excess so it's 0 the next visit is the first
105 # excess visit so it's 1 and so on.
107 # note that <node> will always be present and non-empty
108 # unless withbr is true then <parent> will also always be present and non-empty
109 # even if withbr IS true <parent> will be non-empty if extra path items were
110 # provided (the first path item becomes the parent and the rest the chain)
111 # the rest of the path items show the link chain from <node> up to <startb>
112 # with any extra path items output on the end
114 # An output line might look like this:
116 # 0 1 1 0 t/foo/leaf t/foo/int t/stage
118 # L=1 "a leaf" means any node that is either not a TopGit branch or is a
119 # non-annihilated TopGit branch with NO non-annihilated dependencies (that
120 # means NO non-tgish dependencies either)
122 # loops are detected and avoided (the link causing the loop is dropped) and
123 # if showlp is true a line like the following will be output whenever one is
124 # encountered (even when filter != 0):
126 # :loop: t/foo/int t/foo/leaf t/foo/int t/stage
128 # the two branch names immediately after LOOP show the link that was dropped
129 # to avoid the loop and the rest of the path is the normal branch chain
131 # in filter mode (filter is 1 or 2) the output is a list of deps (1) or
132 # edges (2) instead of recursion lines so the above sample output line would
133 # end up being this when filter mode is 1:
135 # t/foo/leaf
137 # to get the "patch series" list used for navigation use withbr=1 once=1 and
138 # filter=1
140 # and just this output when filter mode is 2:
142 # t/foo/int t/foo/leaf
144 # the first two node items from the recursion line are reversed (if withbr
145 # is active the single node name will be doubled to make an edge to itself)
146 # because the edge format is "<node-with-topdeps-line> <for-this-node>" whereas
147 # the normal recursion lines have the opposite order.
149 # When filter != 0, any missing and remote lines are always omitted from
150 # the output, but loop lines (if showlp=1) ARE output and are NOT truncated
151 # at all (i.e. as though filter=0 just for the loop line).
153 # extra items in startb are ignored in filter mode unless multib is "1" in
154 # which case they're then treated as additional starting nodes (just like
155 # normal multib=1 mode does).
157 # when filter mode is active the rtfile file and usermt settings are ignored
158 # (but rmrt will still work) while preord does work it's mostly pointless
159 # in filter mode. the leaves option still works exactly the same way as it's
160 # just the final format of the output line that's affected by filter mode not
161 # which lines (other than omitting remote lines) are output. Lines for
162 # missing (M == 1) items are, however, totally suppressed in filter mode
163 # since they're just not meaningful in that case. loop lines will still be
164 # output in exactly the same format if showlp is true.
166 # filter mode can be helpful when the intent is to ultimately run
167 # awk_topgit_navigate on a subset of the TopGit dependency tree
170 BEGIN { exitcode = "" }
171 function exitnow(e) { exitcode=e; exit e }
172 END { if (exitcode != "") exit exitcode }
174 BEGIN {
175 inited = 0
176 inconly = 0
177 cnt = split(inclbr, scratch, " ")
178 if (cnt) {
179 inconly = 1
180 for (i = 1; i <= cnt; ++i) incnames[scratch[i]] = 1
182 cnt = split(exclbr, scratch, " ")
183 for (i = 1; i <= cnt; ++i) excnames[scratch[i]] = 1
184 cnt = split(startb, scratch, " ")
185 if (!cnt) exitnow(2)
186 if (multib) {
187 multibonce = 1
188 if (multib ~ /^[1-9][0-9]*$/) {
189 if (0 + multib > 1) multibonce = 0
191 startcnt = 0
192 for (i = 1; i <= cnt; ++i) {
193 if (!multibonce || !seenstartbr[scratch[i]]) {
194 startbr[++startcnt] = scratch[i]
195 extrabr[startcnt] = ""
196 seenstartbr[scratch[i]] = 1
199 } else {
200 startbr[1] = scratch[1]
201 xtrapth = ""
202 for (i = 2; i <= cnt; ++i) xtrapth = xtrapth " " scratch[i]
203 extrabr[1] = xtrapth;
204 startcnt = 1
206 sub(/\/+$/, "", usermt)
207 if (usermt != "") usermt = usermt "/"
208 if (filter != "" && filter != 0 && filter != 1 && filter != 2) exitnow(2)
211 function quotevar(v) {
212 gsub(/\047/, "\047\\\047\047", v)
213 return "\047" v "\047"
216 function rmfiles() {
217 rmlist = ""
218 if (rmbr && brfile != "") rmlist = rmlist " " quotevar(brfile)
219 if (rman && anfile != "") rmlist = rmlist " " quotevar(anfile)
220 if (rmhd && hdfile != "") rmlist = rmlist " " quotevar(hdfile)
221 if (rmrt && rtfile != "") rmlist = rmlist " " quotevar(rtfile)
222 if (rmlist != "") {
223 system("rm -f" rmlist)
224 rmbr = 0
225 brfile = ""
226 rman = 0
227 anfile = ""
228 rmhd = 0
229 hdfile = ""
230 rmrt = 0
231 rtfile = ""
235 function init(abranch, _e) {
236 if (inited)
237 return
238 inited=1
239 if (brfile != "") {
240 while ((_e = (getline abranch <brfile)) > 0) {
241 if (abranch != "") tgish[abranch] = 1
243 close(brfile)
244 if (_e < 0) exitnow(2)
246 if (anfile != "") {
247 while ((_e = (getline abranch <anfile)) > 0) {
248 if (abranch != "") ann[abranch] = 1
250 close(anfile)
251 if (_e < 0) exitnow(2)
253 if (hdfile != "") {
254 if (cuthd) {
255 fno = 1
256 if (cuthd ~ /^[1-9][0-9]*$/) fno = 0 + cuthd
258 while ((_e = (getline abranch <hdfile)) > 0) {
259 if (fno) {
260 if (split(abranch, scratch, " ") < fno || length(scratch[fno]) < 12 ||
261 substr(scratch[fno], 1, 11) != "refs/heads/") continue
262 abranch = substr(scratch[fno], 12)
263 sub(/[~:^].*$/, "", abranch)
265 if (abranch != "") heads[abranch] = 1
267 close(hdfile)
268 if (_e < 0) exitnow(2)
270 if (rtfile != "") {
271 if (!filter) {
272 while ((_e = (getline abranch <rtfile)) > 0) {
273 if (abranch != "") tgishr[abranch] = 1
275 close(rtfile)
276 if (_e < 0) exitnow(2)
279 rmfiles()
282 END { init(); rmfiles(); }
284 function inclself(abranch) {
285 return !(abranch in excnames) &&
286 (!inconly || abranch in incnames)
289 function incledge(b1, b2) {
290 return !(b1 in excnames) && !(b2 in excnames) &&
291 (!inconly || b1 in incnames || b2 in incnames)
294 NR == 1 {init()}
296 NF == 2 && $1 != "" && $2 != "" && $1 != $2 &&
297 incledge($1, $2) && !($1 in ann) && (withan || !($2 in ann)) {
298 linkval = links[$1]
299 if (linkval != "") {
300 if (index(" " linkval " ", " " $2 " ")) next
301 links[$1] = linkval " " $2
302 } else {
303 links[$1] = $2
305 if (withan && !($2 in ann)) {
306 # when using withan, linksx is the tree !withan would generate
307 # (it eXcludes all annihilated links)
308 # no need for it if !withan in effect as it would match links
309 linkval = linksx[$1]
310 if (linkval != "") linksx[$1] = linkval " " $2
311 else linksx[$1] = $2
315 function xvisits(node) {
316 if (node in xvisitcnts)
317 xvisitcnts[node] = xvisitcnts[node] + 1
318 else
319 xvisitcnts[node] = 0
320 return xvisitcnts[node]
323 function walktree(node, trail, level,
324 oncenodes, istgish, isleaf, children, childcnt, parent, child, visited, i)
326 if (once > 0 && (node in oncenodes)) return
327 if (!(node in heads)) {
328 if (!filter) print "1 0 0 " xvisits(node) " " node trail
329 if (once) oncenodes[node] = 1
330 return
332 if (filter == 2) {
333 parent = substr(trail, 2)
334 sub(/ .*$/, "", parent)
335 if (parent == "") parent = node
336 parent = parent " "
338 istgish = 0
339 isleaf = 0
340 if (node in ann) {
341 istgish = 1
342 isleaf = 2
343 } else if (node in tgish) {
344 istgish = (node in tgishr) ? 2 : 1
346 if (isleaf != 2) isleaf = !istgish || (withan?linksx[node]:links[node]) == ""
347 if (preord && (level > 0 || (withbr && inclself(node))) && (!leaves || isleaf == 1) && (!tgonly || istgish)) {
348 if (filter) print parent node
349 else print "0 " istgish " " isleaf " " xvisits(node) " " node trail
351 if (!once || !(node in oncenodes)) {
352 if (istgish == 2 && usermt && !leaves && !tgonly)
353 print "0 0 0 " xvisits(usermt node) " " usermt node " " node trail
354 if ((childcnt = split(links[node], children, " "))) {
355 visited = " " node trail " "
356 for (i = 1; i <= childcnt; ++i) {
357 child = children[i]
358 if (index(visited, " " child " ")) {
359 if (showlp) print ":loop: " child " " node trail
360 continue
362 walktree(child, " " node trail, level + 1, oncenodes)
366 if (!preord && (level > 0 || (withbr && inclself(node))) && (!leaves || isleaf == 1) && (!tgonly || istgish)) {
367 if (filter) print parent node
368 else print "0 " istgish " " isleaf " " xvisits(node) " " node trail
370 if (once) oncenodes[node] = 1
373 END {
374 for (startidx = 1; startidx <= startcnt; ++startidx) {
375 astart = startbr[startidx]
376 if (!(astart in excnames) && (!(astart in heads) || withan || !(astart in ann)))
377 walktree(astart, extrabr[startidx], 0)