From 8743ea5ca7def209250011032670d74a1c86ba16 Mon Sep 17 00:00:00 2001
From: Jean-loup Gailly <jloup@gailly.net>
Date: Thu, 21 Jul 2011 16:19:36 +0200
Subject: [PATCH] UCT: introduce val_scale_max

Give more weight to territory when winning big (maximize win). This reduces
the number of silly moves and makes the game more enjoyable for humans.
---
 uct/internal.h |  2 +-
 uct/uct.c      |  8 ++++++--
 uct/walk.c     | 39 ++++++++++++++++++++++++---------------
 3 files changed, 31 insertions(+), 18 deletions(-)

diff --git a/uct/internal.h b/uct/internal.h
index 2aefd17..fdd041a 100644
--- a/uct/internal.h
+++ b/uct/internal.h
@@ -64,7 +64,7 @@ struct uct {
 	int dynkomi_interval;
 	struct uct_dynkomi *dynkomi;
 
-	floating_t val_scale;
+	floating_t val_scale, val_scale_max;
 	int val_points;
 	bool val_extra;
 
diff --git a/uct/uct.c b/uct/uct.c
index 9234e0b..b2010d1 100644
--- a/uct/uct.c
+++ b/uct/uct.c
@@ -558,7 +558,7 @@ uct_state_init(char *arg, struct board *b)
 	u->best2_ratio = 2.5;
 	u->max_maintime_ratio = 8.0;
 
-	u->val_scale = 0.04; u->val_points = 40;
+	u->val_scale = 0.1; u->val_scale_max = 0.2;
 	u->dynkomi_interval = 1000;
 	u->dynkomi_mask = S_BLACK | S_WHITE;
 
@@ -865,9 +865,13 @@ uct_state_init(char *arg, struct board *b)
 				/* How much of the game result value should be
 				 * influenced by win size. Zero means it isn't. */
 				u->val_scale = atof(optval);
+			} else if (!strcasecmp(optname, "val_scale_max") && optval) {
+				/* Value of the result scale when winning big
+				 * (above 1 - resign_threshold) */
+				u->val_scale_max = atof(optval);
 			} else if (!strcasecmp(optname, "val_points") && optval) {
 				/* Maximum size of win to be scaled into game
-				 * result value. Zero means boardsize^2. */
+				 * result value. Zero means boardsize^2/10. */
 				u->val_points = atoi(optval) * 2; // result values are doubled
 			} else if (!strcasecmp(optname, "val_extra")) {
 				/* If false, the score coefficient will be simply
diff --git a/uct/walk.c b/uct/walk.c
index 7bc1e3a..2da7218 100644
--- a/uct/walk.c
+++ b/uct/walk.c
@@ -181,22 +181,31 @@ uct_leaf_node(struct uct *u, struct board *b, enum stone player_color,
 static floating_t
 scale_value(struct uct *u, struct board *b, int result)
 {
-	floating_t rval = result > 0 ? 1.0 : result < 0 ? 0.0 : 0.5;
-	if (u->val_scale && result != 0) {
-		int vp = u->val_points;
-		if (!vp) {
-			vp = board_size(b) - 1; vp *= vp; vp *= 2;
-		}
-
-		floating_t sval = (floating_t) abs(result) / vp;
-		sval = sval > 1 ? 1 : sval;
-		if (result < 0) sval = 1 - sval;
-		if (u->val_extra)
-			rval += u->val_scale * sval;
-		else
-			rval = (1 - u->val_scale) * rval + u->val_scale * sval;
-		// fprintf(stderr, "score %d => sval %f, rval %f\n", result, sval, rval);
+	if (result == 0) return 0.5;
+	floating_t rval = result > 0 ? 1.0 : 0.0;
+
+	floating_t scale = u->val_scale;
+	/* Give more weight to territory when winning big (maximize win). This reduces
+	 * the number of silly moves and makes the game more enjoyable for humans. */
+	if (u->t->root->u.playouts > GJ_MINGAMES &&
+	    tree_node_get_value(u->t, -1, u->t->root->u.value) >= u->sure_win_threshold) {
+		scale = u->val_scale_max;
 	}
+	if (scale == 0) return rval;
+
+	int vp = u->val_points;
+	/* By default do not try to win by more than 44 points on 19x19,
+	 * 12 points on 9x9. Remember that result here is twice the score. */
+	if (!vp) vp = board_size2(b) / 5;
+
+	floating_t sval = (floating_t) abs(result) / vp;
+	sval = sval > 1 ? 1 : sval;
+	if (result < 0) sval = 1 - sval;
+	if (u->val_extra)
+		rval += scale * sval;
+	else
+		rval = (1 - scale) * rval + scale * sval;
+	// fprintf(stderr, "score %d => sval %f, rval %f\n", result, sval, rval);
 	return rval;
 }
 
-- 
2.11.4.GIT