python/graph: tweak colour schemes for distance charts
[Samba.git] / python / samba / graph.py
blob385e377906b72601f558ba8301ac0b89bba016a5
1 # -*- coding: utf-8 -*-
2 # Graph topology utilities and dot file generation
4 # Copyright (C) Andrew Bartlett 2018.
6 # Written by Douglas Bagnall <douglas.bagnall@catalyst.net.nz>
8 # This program is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 3 of the License, or
11 # (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 from __future__ import print_function
22 from __future__ import division
23 from samba import colour
24 import sys
25 from itertools import cycle, groupby
27 FONT_SIZE = 10
30 def reformat_graph_label(s):
31 """Break DNs over multiple lines, for better shaped and arguably more
32 readable nodes. We try to split after commas, and if necessary
33 after hyphens or failing that in arbitrary places."""
34 if len(s) < 12:
35 return s
37 s = s.replace(',', ',\n')
38 pieces = []
39 for p in s.split('\n'):
40 while len(p) > 20:
41 if '-' in p[2:20]:
42 q, p = p.split('-', 1)
43 else:
44 n = len(p) // 12
45 b = len(p) // n
46 q, p = p[:b], p[b:]
47 pieces.append(q + '-')
48 if p:
49 pieces.append(p)
51 return '\\n'.join(pieces)
54 def quote_graph_label(s, reformat=False):
55 """Escape a string as graphvis requires."""
56 # escaping inside quotes is simple in dot, because only " is escaped.
57 # there is no need to count backslashes in sequences like \\\\"
58 s = s.replace('"', '\"')
59 if reformat:
60 s = reformat_graph_label(s)
61 return "%s" % s
64 def shorten_vertex_names(edges, vertices, suffix=',...', aggressive=False):
65 """Replace the common suffix (in practice, the base DN) of a number of
66 vertices with a short string (default ",..."). If this seems
67 pointless because the replaced string is very short or the results
68 seem strange, the original vertices are retained.
70 :param edges: a sequence of vertex pairs to shorten
71 :param vertices: a sequence of vertices to shorten
72 :param suffix: the replacement string [",..."]
74 :return: tuple of (edges, vertices, replacement)
76 If no change is made, the returned edges and vertices will be the
77 original lists and replacement will be None.
79 If a change is made, replacement will be a tuple (new, original)
80 indicating the new suffix that replaces the old.
81 """
82 vlist = list(set(x[0] for x in edges) |
83 set(x[1] for x in edges) |
84 set(vertices))
86 if len(vlist) < 2:
87 return edges, vertices, None
89 # walk backwards along all the strings until we meet a character
90 # that is not shared by all.
91 i = -1
92 try:
93 while True:
94 c = set(x[i] for x in vlist)
95 if len(c) > 1:
96 break
97 i -= 1
98 except IndexError:
99 # We have indexed beyond the start of a string, which should
100 # only happen if one node is a strict suffix of all others.
101 return edges, vertices, None
103 # add one to get to the last unanimous character.
104 i += 1
106 # now, we actually really want to split on a comma. So we walk
107 # back to a comma.
108 x = vlist[0]
109 while i < len(x) and x[i] != ',':
110 i += 1
112 if i >= -len(suffix):
113 # there is nothing to gain here
114 return edges, vertices, None
116 edges2 = []
117 vertices2 = []
119 for a, b in edges:
120 edges2.append((a[:i] + suffix, b[:i] + suffix))
121 for a in vertices:
122 vertices2.append(a[:i] + suffix)
124 replacements = [(suffix, a[i:])]
126 if aggressive:
127 # Remove known common annoying strings
128 map = dict((v, v) for v in vertices2)
129 for v in vertices2:
130 if ',CN=Servers,' not in v:
131 break
132 else:
133 map = dict((k, v.replace(',CN=Servers,', ',**,'))
134 for k, v in map.items())
135 replacements.append(('**', 'CN=Servers'))
137 for v in vertices2:
138 if not v.startswith('CN=NTDS Settings,'):
139 break
140 else:
141 map = dict((k, v.replace('CN=NTDS Settings,', '*,'))
142 for k, v in map.items())
143 replacements.append(('*', 'CN=NTDS Settings'))
145 edges2 = [(map.get(a, a), map.get(b, b)) for a, b in edges2]
146 vertices2 = [map.get(a, a) for a in vertices2]
148 return edges2, vertices2, replacements
151 def compile_graph_key(key_items, nodes_above=[], elisions=None,
152 prefix='key_', width=2):
153 """Generate a dot file snippet that acts as a legend for a graph.
155 :param key_items: sequence of items (is_vertex, style, label)
156 :param nodes_above: list of vertices (pushes key into right position)
157 :param elision: tuple (short, full) indicating suffix replacement
158 :param prefix: string used to generate key node names ["key_"]
159 :param width: default width of node lines
161 Each item in key_items is a tuple of (is_vertex, style, label).
162 is_vertex is a boolean indicating whether the item is a vertex
163 (True) or edge (False). Style is a dot style string for the edge
164 or vertex. label is the text associated with the key item.
166 edge_lines = []
167 edge_names = []
168 vertex_lines = []
169 vertex_names = []
170 order_lines = []
171 for i, item in enumerate(key_items):
172 is_vertex, style, label = item
173 tag = '%s%d_' % (prefix, i)
174 label = quote_graph_label(label)
175 name = '%s_label' % tag
177 if is_vertex:
178 order_lines.append(name)
179 vertex_names.append(name)
180 vertex_lines.append('%s[label="%s"; %s]' %
181 (name, label, style))
182 else:
183 edge_names.append(name)
184 e1 = '%se1' % tag
185 e2 = '%se2' % tag
186 order_lines.append(name)
187 edge_lines.append('subgraph cluster_%s {' % tag)
188 edge_lines.append('%s[label=src; color="#000000"; group="%s_g"]' %
189 (e1, tag))
190 edge_lines.append('%s[label=dest; color="#000000"; group="%s_g"]' %
191 (e2, tag))
192 edge_lines.append('%s -> %s [constraint = false; %s]' % (e1, e2,
193 style))
194 edge_lines.append(('%s[shape=plaintext; style=solid; width=%f; '
195 'label="%s\\r"]') %
196 (name, width, label))
197 edge_lines.append('}')
199 elision_str = ''
200 if elisions:
201 for i, elision in enumerate(reversed(elisions)):
202 order_lines.append('elision%d' % i)
203 short, long = elision
204 if short[0] == ',' and long[0] == ',':
205 short = short[1:]
206 long = long[1:]
207 elision_str += ('\nelision%d[shape=plaintext; style=solid; '
208 'label="\“%s” means “%s\\r"]\n'
209 % ((i, short, long)))
211 above_lines = []
212 if order_lines:
213 for n in nodes_above:
214 above_lines.append('"%s" -> %s [style=invis]' %
215 (n, order_lines[0]))
217 s = ('subgraph cluster_key {\n'
218 'label="Key";\n'
219 'subgraph cluster_key_nodes {\n'
220 'label="";\n'
221 'color = "invis";\n'
222 '%s\n'
223 '}\n'
224 'subgraph cluster_key_edges {\n'
225 'label="";\n'
226 'color = "invis";\n'
227 '%s\n'
228 '{%s}\n'
229 '}\n'
230 '%s\n'
231 '}\n'
232 '%s\n'
233 '%s [style=invis; weight=9]'
234 '\n'
235 % (';\n'.join(vertex_lines),
236 '\n'.join(edge_lines),
237 ' '.join(edge_names),
238 elision_str,
239 ';\n'.join(above_lines),
240 ' -> '.join(order_lines),
243 return s
246 def dot_graph(vertices, edges,
247 directed=False,
248 title=None,
249 reformat_labels=True,
250 vertex_colors=None,
251 edge_colors=None,
252 edge_labels=None,
253 vertex_styles=None,
254 edge_styles=None,
255 graph_name=None,
256 shorten_names=False,
257 key_items=None,
258 vertex_clusters=None):
259 """Generate a Graphviz representation of a list of vertices and edges.
261 :param vertices: list of vertex names (optional).
262 :param edges: list of (vertex, vertex) pairs
263 :param directed: bool: whether the graph is directed
264 :param title: optional title for the graph
265 :param reformat_labels: whether to wrap long vertex labels
266 :param vertex_colors: if not None, a sequence of colours for the vertices
267 :param edge_colors: if not None, colours for the edges
268 :param edge_labels: if not None, labels for the edges
269 :param vertex_styles: if not None, DOT style strings for vertices
270 :param edge_styles: if not None, DOT style strings for edges
271 :param graph_name: if not None, name of graph
272 :param shorten_names: if True, remove common DN suffixes
273 :param key: (is_vertex, style, description) tuples
274 :param vertex_clusters: list of subgraph cluster names
276 Colour, style, and label lists must be the same length as the
277 corresponding list of edges or vertices (or None).
279 Colours can be HTML RGB strings ("#FF0000") or common names
280 ("red"), or some other formats you don't want to think about.
282 If `vertices` is None, only the vertices mentioned in the edges
283 are shown, and their appearance can be modified using the
284 vertex_colors and vertex_styles arguments. Vertices appearing in
285 the edges but not in the `vertices` list will be shown but their
286 styles can not be modified.
288 out = []
289 write = out.append
291 if vertices is None:
292 vertices = set(x[0] for x in edges) | set(x[1] for x in edges)
294 if shorten_names:
295 edges, vertices, elisions = shorten_vertex_names(edges, vertices)
296 else:
297 elisions = None
299 if graph_name is None:
300 graph_name = 'A_samba_tool_production'
302 if directed:
303 graph_type = 'digraph'
304 connector = '->'
305 else:
306 graph_type = 'graph'
307 connector = '--'
309 write('/* generated by samba */')
310 write('%s %s {' % (graph_type, graph_name))
311 if title is not None:
312 write('label="%s";' % (title,))
313 write('fontsize=%s;\n' % (FONT_SIZE))
314 write('node[fontname=Helvetica; fontsize=%s];\n' % (FONT_SIZE))
316 prev_cluster = None
317 cluster_n = 0
318 quoted_vertices = []
319 for i, v in enumerate(vertices):
320 v = quote_graph_label(v, reformat_labels)
321 quoted_vertices.append(v)
322 attrs = []
323 if vertex_clusters and vertex_clusters[i]:
324 cluster = vertex_clusters[i]
325 if cluster != prev_cluster:
326 if prev_cluster is not None:
327 write("}")
328 prev_cluster = cluster
329 n = quote_graph_label(cluster)
330 if cluster:
331 write('subgraph cluster_%d {' % cluster_n)
332 cluster_n += 1
333 write('style = "rounded,dotted";')
334 write('node [style="filled"; fillcolor=white];')
335 write('label = "%s";' % n)
337 if vertex_styles and vertex_styles[i]:
338 attrs.append(vertex_styles[i])
339 if vertex_colors and vertex_colors[i]:
340 attrs.append('color="%s"' % quote_graph_label(vertex_colors[i]))
341 if attrs:
342 write('"%s" [%s];' % (v, ', '.join(attrs)))
343 else:
344 write('"%s";' % (v,))
346 if prev_cluster:
347 write("}")
349 for i, edge in enumerate(edges):
350 a, b = edge
351 if a is None:
352 a = "Missing source value"
353 if b is None:
354 b = "Missing destination value"
356 a = quote_graph_label(a, reformat_labels)
357 b = quote_graph_label(b, reformat_labels)
359 attrs = []
360 if edge_labels:
361 label = quote_graph_label(edge_labels[i])
362 attrs.append('label="%s"' % label)
363 if edge_colors:
364 attrs.append('color="%s"' % quote_graph_label(edge_colors[i]))
365 if edge_styles:
366 attrs.append(edge_styles[i]) # no quoting
367 if attrs:
368 write('"%s" %s "%s" [%s];' % (a, connector, b, ', '.join(attrs)))
369 else:
370 write('"%s" %s "%s";' % (a, connector, b))
372 if key_items:
373 key = compile_graph_key(key_items, nodes_above=quoted_vertices,
374 elisions=elisions)
375 write(key)
377 write('}\n')
378 return '\n'.join(out)
381 COLOUR_SETS = {
382 'ansi': {
383 'alternate rows': (colour.DARK_WHITE, colour.BLACK),
384 'disconnected': colour.RED,
385 'connected': colour.GREEN,
386 'transitive': colour.DARK_YELLOW,
387 'header': colour.UNDERLINE,
388 'reset': colour.C_NORMAL,
390 'ansi-heatmap': {
391 'alternate rows': (colour.DARK_WHITE, colour.BLACK),
392 'disconnected': colour.REV_RED,
393 'connected': colour.REV_GREEN,
394 'transitive': colour.REV_DARK_YELLOW,
395 'header': colour.UNDERLINE,
396 'reset': colour.C_NORMAL,
398 'xterm-256color': {
399 'alternate rows': (colour.xterm_256_colour(39),
400 colour.xterm_256_colour(45)),
401 #'alternate rows': (colour.xterm_256_colour(246),
402 # colour.xterm_256_colour(247)),
403 'disconnected': colour.xterm_256_colour(124, bg=True),
404 'connected': colour.xterm_256_colour(112),
405 'transitive': colour.xterm_256_colour(214),
406 'transitive scale': (colour.xterm_256_colour(190),
407 colour.xterm_256_colour(184),
408 colour.xterm_256_colour(220),
409 colour.xterm_256_colour(214),
410 colour.xterm_256_colour(208),
412 'header': colour.UNDERLINE,
413 'reset': colour.C_NORMAL,
415 'xterm-256color-heatmap': {
416 'alternate rows': (colour.xterm_256_colour(171),
417 colour.xterm_256_colour(207)),
418 #'alternate rows': (colour.xterm_256_colour(246),
419 # colour.xterm_256_colour(247)),
420 'disconnected': colour.xterm_256_colour(124, bg=True),
421 'connected': colour.xterm_256_colour(112, bg=True),
422 'transitive': colour.xterm_256_colour(214, bg=True),
423 'transitive scale': (colour.xterm_256_colour(190, bg=True),
424 colour.xterm_256_colour(184, bg=True),
425 colour.xterm_256_colour(220, bg=True),
426 colour.xterm_256_colour(214, bg=True),
427 colour.xterm_256_colour(208, bg=True),
429 'header': colour.UNDERLINE,
430 'reset': colour.C_NORMAL,
432 None: {
433 'alternate rows': ('',),
434 'disconnected': '',
435 'connected': '',
436 'transitive': '',
437 'header': '',
438 'reset': '',
443 def find_transitive_distance(vertices, edges):
444 all_vertices = (set(vertices) |
445 set(e[0] for e in edges) |
446 set(e[1] for e in edges))
448 if all_vertices != set(vertices):
449 print("there are unknown vertices: %s" %
450 (all_vertices - set(vertices)),
451 file=sys.stderr)
453 # with n vertices, we are always less than n hops away from
454 # anywhere else.
455 inf = len(all_vertices)
456 distances = {}
457 for v in all_vertices:
458 distances[v] = {v: 0}
460 for src, dest in edges:
461 distances[src][dest] = distances[src].get(dest, 1)
463 # This algorithm (and implementation) seems very suboptimal.
464 # potentially O(n^4), though n is smallish.
465 for i in range(inf):
466 changed = False
467 new_distances = {}
468 for v, d in distances.items():
469 new_d = d.copy()
470 new_distances[v] = new_d
471 for dest, cost in d.items():
472 for leaf, cost2 in distances[dest].items():
473 new_cost = cost + cost2
474 old_cost = d.get(leaf, inf)
475 if new_cost < old_cost:
476 new_d[leaf] = new_cost
477 changed = True
479 distances = new_distances
480 if not changed:
481 break
483 # filter out unwanted vertices and infinite links
484 answer = {}
485 for v in vertices:
486 answer[v] = {}
487 for v2 in vertices:
488 a = distances[v].get(v2, inf)
489 if a < inf:
490 answer[v][v2] = a
492 return answer
495 def get_transitive_colourer(colours, n_vertices):
496 if 'transitive scale' in colours:
497 scale = colours['transitive scale']
498 m = len(scale)
499 n = 1 + int(n_vertices ** 0.5)
501 def f(link):
502 return scale[min(link * m // n, m - 1)]
504 else:
505 def f(link):
506 return colours['transitive']
508 return f
511 def distance_matrix(vertices, edges,
512 utf8=False,
513 colour=None,
514 shorten_names=False,
515 generate_key=False,
516 grouping_function=None,
517 row_comments=None):
518 lines = []
519 write = lines.append
521 if utf8:
522 vertical = '│'
523 horizontal = '─'
524 corner = '╭'
525 #diagonal = '╲'
526 diagonal = '·'
527 #missing = '🕱'
528 missing = '-'
529 right_arrow = '←'
530 else:
531 vertical, horizontal, corner, diagonal, missing = '|-,0-'
532 right_arrow = '<-'
534 colours = COLOUR_SETS[colour]
536 colour_cycle = cycle(colours.get('alternate rows', ('',)))
538 if vertices is None:
539 vertices = sorted(set(x[0] for x in edges) | set(x[1] for x in edges))
541 if grouping_function is not None:
542 # we sort and colour according to the grouping function
543 # which can be used to e.g. alternate colours by site.
544 vertices = sorted(vertices, key=grouping_function)
545 colour_list = []
546 for k, v in groupby(vertices, key=grouping_function):
547 c = next(colour_cycle)
548 colour_list.extend(c for x in v)
549 else:
550 colour_list = [next(colour_cycle) for v in vertices]
552 if shorten_names:
553 edges, vertices, replacements = shorten_vertex_names(edges,
554 vertices,
555 '+',
556 aggressive=True)
558 vlen = max(6, max(len(v) for v in vertices))
560 # first, the key for the columns
561 c_header = colours.get('header', '')
562 c_disconn = colours.get('disconnected', '')
563 c_conn = colours.get('connected', '')
564 c_reset = colours.get('reset', '')
566 colour_transitive = get_transitive_colourer(colours, len(vertices))
568 vspace = ' ' * vlen
569 verticals = ''
570 write("%*s %s %sdestination%s" % (vlen, '',
571 ' ' * len(vertices),
572 c_header,
573 c_reset))
574 for i, v in enumerate(vertices):
575 j = len(vertices) - i
576 c = colour_list[i]
577 if j == 1:
578 start = '%s%ssource%s' % (vspace[:-6], c_header, c_reset)
579 else:
580 start = vspace
581 write('%s %s%s%s%s%s %s%s' % (start,
582 verticals,
583 c_reset,
585 corner,
586 horizontal * j,
588 c_reset
590 verticals += c + vertical
592 connections = find_transitive_distance(vertices, edges)
594 for i, v in enumerate(vertices):
595 c = colour_list[i]
596 links = connections[v]
597 row = []
598 for v2 in vertices:
599 link = links.get(v2)
600 if link is None:
601 row.append('%s%s' % (c_disconn, missing))
602 continue
603 if link == 0:
604 row.append('%s%s%s%s' % (c_reset, c, diagonal, c_reset))
605 elif link == 1:
606 row.append('%s1%s' % (c_conn, c_reset))
607 else:
608 ct = colour_transitive(link)
609 if link > 9:
610 link = '+'
611 row.append('%s%s%s' % (ct, link, c_reset))
613 if row_comments is not None and row_comments[i]:
614 row.append('%s %s %s' % (c_reset, right_arrow, row_comments[i]))
616 write('%s%*s%s %s%s' % (c, vlen, v, c_reset,
617 ''.join(row), c_reset))
619 example_c = next(colour_cycle)
620 if shorten_names:
621 write('')
622 for substitute, original in reversed(replacements):
623 write("'%s%s%s' stands for '%s%s%s'" % (example_c,
624 substitute,
625 c_reset,
626 example_c,
627 original,
628 c_reset))
629 if generate_key:
630 write('')
631 write("Data can get from %ssource%s to %sdestination%s in the "
632 "indicated number of steps." % (c_header, c_reset,
633 c_header, c_reset))
634 write("%s%s%s means zero steps (it is the same DC)" %
635 (example_c, diagonal, c_reset))
636 write("%s1%s means a direct link" % (c_conn, c_reset))
637 write("%s2%s means a transitive link involving two steps "
638 "(i.e. one intermediate DC)" %
639 (colour_transitive(2), c_reset))
640 write("%s%s%s means there is no connection, even through other DCs" %
641 (c_disconn, missing, c_reset))
643 return '\n'.join(lines)