convert kmeans_incr threshold
[actl.git] / clustering.c
blob495f41ddba87bdba78b46bba32a491d626d6c197
1 /*
2 * Copyright (c) 2017 Mohamed Aslan <maslan@sce.carleton.ca>
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <math.h>
20 #include <stddef.h>
21 #include "clustering.h"
23 static int
24 nearest_head(double x, struct cluster *clusters, int n_clusters)
26 int i;
27 int min_idx = 0;
28 double min_dist = fabs(x - clusters[0].head); /* FIXME */
30 if (n_clusters == 1)
31 return 0;
33 for (i = 1 ; i < n_clusters ; i++) {
34 if (fabs(x - clusters[i].head) < min_dist) {
35 min_dist = fabs(x - clusters[i].head);
36 min_idx = i;
40 return min_idx;
44 void
45 kmeans_seq_init(struct kmeans_seq *kseq, size_t n)
47 int i;
49 kseq->n_points = 0;
50 kseq->n_clusters = n;
51 kseq->clusters = (struct cluster *)reallocarray(NULL, n, sizeof(struct cluster));
52 for (i = 0 ; i < n ; i++) {
53 kseq->clusters[i].head = 0;
54 kseq->clusters[i].val = 0;
55 kseq->clusters[i].n_points = 0;
59 void
60 kmeans_seq_insert(struct kmeans_seq *kseq, double k, double v)
62 int near;
64 if (kseq->n_points < kseq->n_clusters) {
65 kseq->clusters[kseq->n_points].head = k;
66 kseq->clusters[kseq->n_points].val = v;
67 kseq->clusters[kseq->n_points++].n_points++;
68 } else {
69 near = nearest_head(k, kseq->clusters, kseq->n_clusters);
70 /* XXX: lock */
71 kseq->clusters[near].head =
72 (kseq->clusters[near].head * kseq->clusters[near].n_points + k) / (kseq->clusters[near].n_points + 1);
73 kseq->clusters[near].val =
74 (kseq->clusters[near].val * kseq->clusters[near].n_points + v) / (kseq->clusters[near].n_points + 1);
75 kseq->clusters[near].n_points++;
79 double
80 kmeans_seq_find(struct kmeans_seq *kseq, double k)
82 int near;
84 near = nearest_head(k, kseq->clusters, kseq->n_clusters);
85 return kseq->clusters[near].val;
88 void
89 kmeans_seq_print(struct kmeans_seq *kseq, const char *prefix)
91 int i;
93 printf("[%s] kmeans_seq n_clusters: %zu\n", prefix, kseq->n_clusters);
94 for (i = 0 ; i < kseq->n_clusters ; i++)
95 printf("[%s] C(%d): (%lf, %lf, %zu)\n", prefix, i, kseq->clusters[i].head, kseq->clusters[i].val, kseq->clusters[i].n_points);
98 void
99 kmeans_incr_init(struct kmeans_incr *kincr, double thresh)
101 int i;
103 kincr->threshold = thresh;
104 kincr->n_clusters = 0;
105 kincr->clusters = (struct cluster *)reallocarray(NULL, KMEANS_INCR_MAXTHRESH, sizeof(struct cluster));
106 for (i = 0 ; i < KMEANS_INCR_MAXTHRESH ; i++) {
107 kincr->clusters[i].head = 0;
108 kincr->clusters[i].val = 0;
109 kincr->clusters[i].n_points = 0;
113 static void
114 new_cluster(struct kmeans_incr *kincr, double k, double v)
116 kincr->clusters[kincr->n_clusters].head = k;
117 kincr->clusters[kincr->n_clusters].val = v;
118 kincr->clusters[kincr->n_clusters].n_points = 1;
119 kincr->n_clusters++;
122 void
123 kmeans_incr_insert(struct kmeans_incr *kincr, double k, double v)
125 int near;
126 double dist;
128 /* first point? */
129 if (!(kincr->n_clusters)) {
130 new_cluster(kincr, k, v);
131 return;
133 near = nearest_head(k, kincr->clusters, kincr->n_clusters);
134 /* head is zero? */
135 if (!(kincr->clusters[near].head)) {
136 new_cluster(kincr, k, v);
137 return;
139 dist = fabs(kincr->clusters[near].head - k) / kincr->clusters[near].head;
140 /* dist < threshold or max clusters is reached? */
141 if ((dist < kincr->threshold) || (kincr->n_clusters == KMEANS_INCR_MAXTHRESH)) {
142 /* XXX: lock */
143 kincr->clusters[near].head =
144 (kincr->clusters[near].head * kincr->clusters[near].n_points + k) / (kincr->clusters[near].n_points + 1);
145 kincr->clusters[near].val =
146 (kincr->clusters[near].val * kincr->clusters[near].n_points + v) / (kincr->clusters[near].n_points + 1);
147 kincr->clusters[near].n_points++;
148 return;
150 new_cluster(kincr, k, v);
153 double
154 kmeans_incr_find(struct kmeans_incr *kincr, double k)
156 int near;
158 near = nearest_head(k, kincr->clusters, kincr->n_clusters);
159 return kincr->clusters[near].val;
162 void
163 kmeans_incr_print(struct kmeans_incr *kincr, const char *prefix)
165 int i;
167 printf("[%s] kmeans_seq n_clusters: %zu\n", prefix, kincr->n_clusters);
168 printf("[%s] kmeans_incr threshold: %lf\n", prefix, kincr->threshold);
169 for (i = 0 ; i < kincr->n_clusters ; i++)
170 printf("[%s] C(%d): (%lf, %lf, %zu)\n", prefix, i, kincr->clusters[i].head, kincr->clusters[i].val, kincr->clusters[i].n_points);