ex_rna16s: Add some procaryota RNA 16S samples
[autophylo.git] / matrix_to_tree.sh
blob09af10e3ed6b88763b12af30e851f921d40ff999
1 #!/bin/sh
2 # Normalize the matrix (on stdin), removing the diagonal and merging
3 # complementary cells and producing a minimum spanning tree with virtual
4 # nodes per component join (on stdout).
6 cat |
7 egrep -v '^[^ ]* (.) \1$' | # weed out edges to self
8 perl -nle 'BEGIN { our @w; }
9 chomp; my ($w, $a, $b) = split / /;
10 if (not defined $w[$b][$a]) { $w[$a][$b] = $w; next; }
11 $w[$b][$a] = ($w[$b][$a] + $w) / 2;
12 END { for my $a (0..$#w) {
13 for my $b ($a+1..$#w) {
14 print "$w[$a][$b] $a $b"
16 } }' | # merge A_ij and A_ji
17 sort -f | # lowest weights first (they have highest priority)
18 perl -nle 'BEGIN { our @c = 0..1000; our @v; my $vv = 0; }
19 chomp; my ($w, $a, $b) = split / /;
20 $c[$a] != $c[$b] or next;
21 # Each graph component is either a singleton, or
22 # has "virtual head", merging two sub-components;
23 # virtual heads have negative id, assigned sequentially.
24 my $va = $e[$c[$a]] || $a; # virtual nodes
25 my $vb = $e[$c[$b]] || $b;
26 #print "\n$a($c[$a]):$va -- $b($c[$b]):$vb";
27 my $d = $c[$b];
28 for (0..$#c) { $c[$_] == $d and $c[$_] = $c[$a]; }
29 my $vc = --$vv;
30 $e[$c[$a]] = $vc;
31 $w /= 2;
32 print "$w $va $vc";
33 print "$w $vb $vc";
34 ' # remove reflexive edges and extract minimum spanning tree