2 * Copyright (c) 2017 Mohamed Aslan <maslan@sce.carleton.ca>
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21 #include "clustering.h"
24 nearest_head(double x
, struct cluster
*clusters
, int n_clusters
)
28 double min_dist
= fabs(x
- clusters
[0].head
); /* FIXME */
33 for (i
= 1 ; i
< n_clusters
; i
++) {
34 if (fabs(x
- clusters
[i
].head
) < min_dist
) {
35 min_dist
= fabs(x
- clusters
[i
].head
);
45 kmeans_seq_init(struct kmeans_seq
*kseq
, size_t n
)
51 kseq
->clusters
= (struct cluster
*)reallocarray(NULL
, n
, sizeof(struct cluster
));
52 for (i
= 0 ; i
< n
; i
++) {
53 kseq
->clusters
[i
].head
= 0;
54 kseq
->clusters
[i
].val
= 0;
55 kseq
->clusters
[i
].n_points
= 0;
60 kmeans_seq_insert(struct kmeans_seq
*kseq
, double k
, double v
)
64 if (kseq
->n_points
< kseq
->n_clusters
) {
65 kseq
->clusters
[kseq
->n_points
].head
= k
;
66 kseq
->clusters
[kseq
->n_points
].val
= v
;
67 kseq
->clusters
[kseq
->n_points
++].n_points
++;
69 near
= nearest_head(k
, kseq
->clusters
, kseq
->n_clusters
);
71 kseq
->clusters
[near
].head
=
72 (kseq
->clusters
[near
].head
* kseq
->clusters
[near
].n_points
+ k
) / (kseq
->clusters
[near
].n_points
+ 1);
73 kseq
->clusters
[near
].val
=
74 (kseq
->clusters
[near
].val
* kseq
->clusters
[near
].n_points
+ v
) / (kseq
->clusters
[near
].n_points
+ 1);
75 kseq
->clusters
[near
].n_points
++;
80 kmeans_seq_find(struct kmeans_seq
*kseq
, double k
)
84 near
= nearest_head(k
, kseq
->clusters
, kseq
->n_clusters
);
85 return kseq
->clusters
[near
].val
;
89 kmeans_seq_print(struct kmeans_seq
*kseq
, const char *prefix
)
93 printf("[%s] kmeans_seq n_clusters: %zu\n", prefix
, kseq
->n_clusters
);
94 for (i
= 0 ; i
< kseq
->n_clusters
; i
++)
95 printf("[%s] C(%d): (%lf, %lf, %zu)\n", prefix
, i
, kseq
->clusters
[i
].head
, kseq
->clusters
[i
].val
, kseq
->clusters
[i
].n_points
);
99 kmeans_incr_init(struct kmeans_incr
*kincr
, double thresh
)
103 kincr
->threshold
= thresh
;
104 kincr
->n_clusters
= 0;
105 kincr
->clusters
= (struct cluster
*)reallocarray(NULL
, KMEANS_INCR_MAXTHRESH
, sizeof(struct cluster
));
106 for (i
= 0 ; i
< KMEANS_INCR_MAXTHRESH
; i
++) {
107 kincr
->clusters
[i
].head
= 0;
108 kincr
->clusters
[i
].val
= 0;
109 kincr
->clusters
[i
].n_points
= 0;
114 new_cluster(struct kmeans_incr
*kincr
, double k
, double v
)
116 kincr
->clusters
[kincr
->n_clusters
].head
= k
;
117 kincr
->clusters
[kincr
->n_clusters
].val
= v
;
118 kincr
->clusters
[kincr
->n_clusters
].n_points
= 1;
123 kmeans_incr_insert(struct kmeans_incr
*kincr
, double k
, double v
)
129 if (!(kincr
->n_clusters
)) {
130 new_cluster(kincr
, k
, v
);
133 near
= nearest_head(k
, kincr
->clusters
, kincr
->n_clusters
);
135 if (!(kincr
->clusters
[near
].head
)) {
136 new_cluster(kincr
, k
, v
);
139 dist
= fabs(kincr
->clusters
[near
].head
- k
) / kincr
->clusters
[near
].head
;
140 /* dist < threshold or max clusters is reached? */
141 if ((dist
< kincr
->threshold
) || (kincr
->n_clusters
== KMEANS_INCR_MAXTHRESH
)) {
143 kincr
->clusters
[near
].head
=
144 (kincr
->clusters
[near
].head
* kincr
->clusters
[near
].n_points
+ k
) / (kincr
->clusters
[near
].n_points
+ 1);
145 kincr
->clusters
[near
].val
=
146 (kincr
->clusters
[near
].val
* kincr
->clusters
[near
].n_points
+ v
) / (kincr
->clusters
[near
].n_points
+ 1);
147 kincr
->clusters
[near
].n_points
++;
150 new_cluster(kincr
, k
, v
);
154 kmeans_incr_find(struct kmeans_incr
*kincr
, double k
)
158 near
= nearest_head(k
, kincr
->clusters
, kincr
->n_clusters
);
159 return kincr
->clusters
[near
].val
;
163 kmeans_incr_print(struct kmeans_incr
*kincr
, const char *prefix
)
167 printf("[%s] kmeans_seq n_clusters: %zu\n", prefix
, kincr
->n_clusters
);
168 printf("[%s] kmeans_incr threshold: %lf\n", prefix
, kincr
->threshold
);
169 for (i
= 0 ; i
< kincr
->n_clusters
; i
++)
170 printf("[%s] C(%d): (%lf, %lf, %zu)\n", prefix
, i
, kincr
->clusters
[i
].head
, kincr
->clusters
[i
].val
, kincr
->clusters
[i
].n_points
);