UCT book -> tbook (tree book), added short explanation to HACKING
[pachi/derm.git] / pattern_spatial_gen.sh
blobcd5d5d2cae72da35db9ea91f6bf64ccc231785d4
1 #!/bin/sh
2 # pattern_spatial_gen: Initialize spatial dictionary from a SGF collection
4 # We will first gather all spatial features from the SGF collection
5 # (we take files as arguments) and store these occuring more than 650 times
6 # in a freshly created spatial dictionary; afterwards, you will probably want
7 # to do standard pattern-matching.
9 # DO NOT RUN THIS CONCURRENTLY! The dictionary will get corrupted.
11 # To get spatials in competion mode (also get information about unplayed
12 # spatials present at the board, not just the single played one per move),
13 # run this script as:
15 # PATARGS="competition" ./pattern_spatial_gen.sh ...
17 # Similarly, you can set SPATMIN to different number than 650 to include
18 # spatial features with lower number of occurences.
20 [ -n "$SPATMIN" ] || SPATMIN=650
22 rm -f patterns.spat
24 echo " Gathering population of spatials occuring more than $SPATMIN times..."
25 (for i in "$@"; do echo $i >&2; ./sgf2gtp.pl $i; done) |
26 ./zzgo -d 0 -e patternscan gen_spat_dict,no_pattern_match,spat_threshold=$SPATMIN${PATARGS:+,$PATARGS} >/dev/null
28 echo " Renumbering patterns.spat..."
29 perl -i -pe '/^#/ and next; s/^\d+/++$a/e' patterns.spat
31 echo -n " Counting hash collisions... "
32 perl -lne 'chomp; my ($id, $d, $p, @h) = split(/ /, $_); foreach (@h) { next if $h{$_} = $id; print "collision $id - $h{$_} ($_)" if $h{$_}; $h{$_}=$id; }' patterns.spat | wc -l