#3145: refactored infix formatter to improve output (less parenthesis) and added...
[heuristiclab.git] / HeuristicLab.Tests / HeuristicLab.Problems.DataAnalysis.Symbolic-3.4 / SymbolicExpressionTreeBottomUpSimilarityCalculatorTest.cs
bloba42d3955190da50aa00476524fa82eba2c1a883d
1 using System;
2 using System.Diagnostics;
3 using HeuristicLab.Random;
4 using Microsoft.VisualStudio.TestTools.UnitTesting;
6 namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Tests {
7 [TestClass]
8 public class BottomUpSimilarityCalculatorTest {
9 private readonly InfixExpressionParser parser = new InfixExpressionParser();
11 private const int N = 200;
12 private const int Rows = 1;
13 private const int Columns = 10;
15 [TestMethod]
16 [TestCategory("Problems.DataAnalysis.Symbolic")]
17 [TestProperty("Time", "short")]
18 public void BottomUpTreeSimilarityCalculatorTestMapping() {
19 TestMatchedNodes("1 + 1", "2 + 2", 0, strict: true);
20 TestMatchedNodes("1 + 1", "2 + 2", 3, strict: false);
21 TestMatchedNodes("1 + 1", "1 + 2", 1, strict: true);
22 TestMatchedNodes("1 + 2", "2 + 1", 3, strict: true);
24 TestMatchedNodes("1 - 1", "2 - 2", 0, strict: true);
25 TestMatchedNodes("1 - 1", "2 - 2", 3, strict: false);
27 TestMatchedNodes("2 - 1", "1 - 2", 2, strict: true);
28 TestMatchedNodes("2 - 1", "1 - 2", 3, strict: false);
30 TestMatchedNodes("X1 * X2 + X3 * X4", "X1 * X2 + X3 * X4", 7, strict: true);
31 TestMatchedNodes("X1 * X2 + X3 * X4", "X1 * X2 + X3 * X4", 7, strict: false);
33 TestMatchedNodes("X1 * X2 + X3 * X4", "X1 * X2 + X5 * X6", 3, strict: true);
34 TestMatchedNodes("X1 * X2 + X3 * X4", "X1 * X2 + X5 * X6", 3, strict: false);
36 TestMatchedNodes("X1 * X2 + X3 * X4", "X1 * X2 - X5 * X6", 3, strict: true);
37 TestMatchedNodes("X1 * X2 + X3 * X4", "X1 * X2 - X5 * X6", 3, strict: false);
39 TestMatchedNodes("SIN(SIN(SIN(X1)))", "SIN(SIN(SIN(X1)))", 4, strict: true);
40 TestMatchedNodes("SIN(SIN(SIN(X1)))", "COS(SIN(SIN(X1)))", 3, strict: true);
41 TestMatchedNodes("SIN(SIN(SIN(X1)))", "COS(COS(SIN(X1)))", 2, strict: true);
42 TestMatchedNodes("SIN(SIN(SIN(X1)))", "COS(COS(COS(X1)))", 1, strict: true);
44 const string lhs = "0.006153 + X9 * X7 * X2 * 0.229506 + X6 * X10 * X3 * 0.924598 + X2 * X1 * 0.951272 + X4 * X3 * 0.992570 + X6 * X5 * 1.027299";
45 const string rhs = "0.006153 + X10 * X7 * X2 * 0.229506 + X6 * X10 * X3 * 0.924598 + X2 * X1 * 0.951272 + X4 * X3 * 0.992570 + X6 * X5 * 1.027299";
47 TestMatchedNodes(lhs, lhs, 24, strict: true);
48 TestMatchedNodes(lhs, lhs, 24, strict: false);
50 TestMatchedNodes(lhs, rhs, 21, strict: true);
51 TestMatchedNodes(lhs, rhs, 21, strict: false);
54 private void TestMatchedNodes(string expr1, string expr2, int expected, bool strict) {
55 var t1 = parser.Parse(expr1);
56 var t2 = parser.Parse(expr2);
58 var map = SymbolicExpressionTreeBottomUpSimilarityCalculator.ComputeBottomUpMapping(t1, t2, strict);
59 Console.WriteLine($"Count: {map.Count}");
61 if (map.Count != expected) {
62 throw new Exception($"Match count {map.Count} is different than expected value {expected} for expressions:\n{expr1} and {expr2} (strict = {strict})\n");
66 [TestMethod]
67 [TestCategory("Problems.DataAnalysis.Symbolic")]
68 [TestProperty("Time", "long")]
69 public void BottomUpTreeSimilarityCalculatorTestPerformance() {
70 var grammar = new TypeCoherentExpressionGrammar();
71 grammar.ConfigureAsDefaultRegressionGrammar();
72 var twister = new MersenneTwister(31415);
73 var ds = Util.CreateRandomDataset(twister, Rows, Columns);
74 var trees = Util.CreateRandomTrees(twister, ds, grammar, N, 100);
76 double s = 0;
77 var sw = new Stopwatch();
79 var similarityCalculator = new SymbolicExpressionTreeBottomUpSimilarityCalculator { MatchVariableWeights = false, MatchNumericValues = false };
81 sw.Start();
82 for (int i = 0; i < trees.Length - 1; ++i) {
83 for (int j = i + 1; j < trees.Length; ++j) {
84 s += similarityCalculator.CalculateSimilarity(trees[i], trees[j]);
88 sw.Stop();
89 Console.WriteLine("Elapsed time: " + sw.ElapsedMilliseconds / 1000.0 + ", Avg. similarity: " + s / (N * (N - 1) / 2));
90 Console.WriteLine(N * (N + 1) / (2 * sw.ElapsedMilliseconds / 1000.0) + " similarity calculations per second.");
93 [TestMethod]
94 [TestCategory("Problems.DataAnalysis.Symbolic")]
95 [TestProperty("Time", "long")]
96 public void BottomUpTreeSimilarityCalculatorStrictMatchingConsistency() {
97 TestMatchingConsistency(strict: true);
100 [TestMethod]
101 [TestCategory("Problems.DataAnalysis.Symbolic")]
102 [TestProperty("Time", "long")]
103 public void BottomUpTreeSimilarityCalculatorRelaxedMatchingConsistency() {
104 TestMatchingConsistency(strict: false);
107 private static void TestMatchingConsistency(bool strict = false) {
108 var grammar = new TypeCoherentExpressionGrammar();
109 grammar.ConfigureAsDefaultRegressionGrammar();
110 var twister = new MersenneTwister(31415);
111 var ds = Util.CreateRandomDataset(twister, Rows, Columns);
112 var trees = Util.CreateRandomTrees(twister, ds, grammar, N, 100);
114 var similarityCalculator = new SymbolicExpressionTreeBottomUpSimilarityCalculator { MatchNumericValues = strict, MatchVariableWeights = strict };
115 var bottomUpSimilarity = 0d;
116 for (int i = 0; i < trees.Length - 1; ++i) {
117 for (int j = i + 1; j < trees.Length; ++j) {
118 bottomUpSimilarity += similarityCalculator.CalculateSimilarity(trees[i], trees[j]);
121 bottomUpSimilarity /= N * (N - 1) / 2;
123 var hashBasedSimilarity = SymbolicExpressionTreeHash.ComputeAverageSimilarity(trees, false, strict);
125 Assert.AreEqual(bottomUpSimilarity, hashBasedSimilarity, 1e-6);
127 Console.WriteLine($"Bottom-up similarity: {bottomUpSimilarity}, hash-based similarity: {hashBasedSimilarity}");