pattern_match_spatial(): Use the incrementally matched hashes
[pachi/json.git] / pattern_mm.sh
blob2af5ecd93959e83d8841eae4756a4024bfe406a8
1 #!/bin/sh
2 # pattern_mm - Harvest patterns from game collection and compute feature strengths
4 # This is a "frontend" for the MM tool by Remi Coulom, that is assumed to be
5 # unpacked and compiled in the mm/ subdirectory - get it at
7 # http://remi.coulom.free.fr/Amsterdam2007/mm.tar.bz2
9 # It will scan a given SGF collection, collect patterns, and use the MM tool
10 # to compute the relative strength of various features. The output will be
12 # * patterns.gamma: Gamma values of pattern features
13 # * patterns.gammaf: Gamma values of pattern features for MC simulations
15 # If you haven't done so yet, you should first run ./pattern_spatial_gen.sh
16 # (probably in the competition scan mode) to initialize the spatial patterns
17 # dictionary for the collection.
19 # If you run this on hundreds of games, be sure you are doing it on local
20 # filesystem, with some free memory (and few GB of free disk on both
21 # local fs and in /tmp for temporary data), and armed by a lot of patience
22 # - it can take long time (minutes, tens of minutes...).
25 if [ -z "$mm_file" ]; then
26 mm_file=patterns.gamma mm_par= ./pattern_mm.sh "$@"
27 mm_file=patterns.gammaf mm_par=,matchfast ./pattern_mm.sh "$@"
28 exit
31 echo "Gathering patterns..."
32 (for i in "$@"; do ./sgf2gtp.pl $i; done) |
33 ./zzgo -e patternscan competition$mm_par |
34 sed -ne 's/^= //p' | grep -v '^$' |
35 ./pattern_enumerate.pl >/tmp/patterns.enum
36 ls -l /tmp/patterns.enum
38 # There must not be pipeline here, because of aux patterns.fdict file!
40 echo "Invoking MM..."
41 cat /tmp/patterns.enum | ./pattern_mminput.pl | mm/mm >/tmp/patterns.mm
43 echo "Associating gamma values..."
44 cat /tmp/patterns.mm | sed 's/ */ /; s/^ //;' | join -o 2.3,1.2 /tmp/patterns.mm patterns.fdict | sed 's/^s\.[0-9]*:/s:/' >$mm_file
46 rm -f /tmp/patterns.enum /tmp/patterns.mm
47 echo "Product:"
48 ls -l $mm_file
49 echo "Leaving behind for analysis:"
50 ls -l patterns.fdict mm-with-freq.dat