pattern_spatial_gen.sh: Change default SPATMIN 650 -> 4
[pachi/t.git] / tools / pattern_spatial_gen.sh
blob73eef4f8c6348d725d7a6c71774164d63d9b6ce8
1 #!/bin/sh
2 # pattern_spatial_gen: Initialize spatial dictionary from a SGF collection
4 # We will first gather all spatial features from the SGF collection
5 # (we take files as arguments) and store these occuring more than 4 times
6 # in a freshly created spatial dictionary; afterwards, you will probably want
7 # to do standard pattern-matching.
9 # DO NOT RUN THIS CONCURRENTLY! The dictionary will get corrupted.
11 # To get spatials in competion mode (also get information about unplayed
12 # spatials present at the board, not just the single played one per move),
13 # run this script as:
15 # PATARGS="competition" ./pattern_spatial_gen.sh ...
17 # Similarly, you can set SPATMIN to different number than 4 to include
18 # spatial features with other number of occurences.
20 [ -n "$SPATMIN" ] || SPATMIN=4
22 rm -f patterns.spat
24 echo " Gathering population of spatials occuring more than $SPATMIN times..."
25 (for i in "$@"; do echo $i >&2; tools/sgf2gtp.pl <$i; done) |
26 ./pachi -d 0 -e patternscan gen_spat_dict,no_pattern_match,spat_threshold=$SPATMIN${PATARGS:+,$PATARGS} >/dev/null
28 echo " Renumbering patterns.spat..."
29 perl -i -pe '/^#/ and next; s/^\d+/++$a/e' patterns.spat
31 echo -n " Counting hash collisions... "
32 perl -lne 'chomp; my ($id, $d, $p, @h) = split(/ /, $_); foreach (@h) { next if $h{$_} = $id; print "collision $id - $h{$_} ($_)" if $h{$_}; $h{$_}=$id; }' patterns.spat | wc -l