bugs: add bootstrap argument specifying version; remove VERSION file.
[Ale.git] / d3 / align.h
blobce8205f7d0b9a38484cb9f42d7a4e9d2f9e357ce
1 // Copyright 2003 David Hilvert <dhilvert@auricle.dyndns.org>,
2 // <dhilvert@ugcs.caltech.edu>
4 /* This file is part of the Anti-Lamenessing Engine.
6 The Anti-Lamenessing Engine is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 The Anti-Lamenessing Engine is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with the Anti-Lamenessing Engine; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 * d3/align.h: Handle alignment of view pyramids.
24 * XXX: this class assumes that the view volume is a right rectangular-based
25 * pyramid, which holds for most photographic situations.
27 * XXX: this class assumes that the horizontal and vertical angles of the view
28 * volume pyramid are proportional to the horizontal and vertical pixel
29 * resolutions.
32 #ifndef __d3align_h__
33 #define __d3align_h__
35 #include "point.h"
36 #include "et.h"
37 #include "pt.h"
38 #include "tfile.h"
40 class align {
41 static ale_pos _init_angle;
42 static et *alignment_array;
43 static int _vp_adjust;
44 static int _vo_adjust;
45 static tload_t *tload;
46 static tsave_t *tsave;
49 * Estimate the point at which the pyramidal axis passes through the
50 * plane, based on the given 2-dimensional transformation and the given
51 * point set indicating the corners of the quadrilateral of
52 * intersection. (Both transformation and point set alone are
53 * sufficient for calculation.)
55 * Using the point set, the desired point is exactly the point of
56 * intersection between line segments drawn between opposite corners of
57 * the quadrilateral of intersection. The algebraic details of this
58 * calculation are desribed below.
60 * If we represent the desired point by (e1, e2), and the four corners
61 * of the quadrilateral of intersection by (a1, a2), ..., (d1, d2) in
62 * clockwise (or counter-clockwise) order, then we get the two-equation
63 * vector system (implicitly four equations)
65 * (e1, e2) = (a1, a2) + x[(c1, c2) - (a1, a2)]
66 * = (b1, b2) + y[(d1, d2) - (b1, b2)]
68 * Solving for x in terms of y, we get the two-equation system
70 * x = (b1 - yb1 + yd1 - a1) / (c1 - a1)
71 * = (b2 - yb2 + yd2 - a2) / (c2 - a2)
73 * And
75 * y = (c1b2 - c1a2 - a1b2 - c2b1 + c2a1 - a2b1)
76 * /(-c2b1 + c2d1 + a2b1 - a2d1 + c1b2 - c1d2 - a1b2 + a1d2)
78 * However, it's much easier just to project the center point of the
79 * source image onto the quadrilateral of intersection using the
80 * given 2-dimensional transformation.
82 static d2::point axis_intersection(d2::point a, d2::point b, d2::point c, d2::point d, d2::transformation t) {
83 #if 0
84 ale_pos y = (c[0]*b[1] - c[0]*a[1] - a[0]*b[1] - c[1]*b[0]
85 + c[1]*a[0] - a[1]*b[0])
86 /(-c[1]*b[0] + c[1]*d[0] + a[1]*b[0] - a[1]*d[0]
87 + c[0]*b[1] - c[0]*d[1] - a[0]*b[1] + a[0]*d[1]);
89 return b + y * (d - b);
90 #else
91 return t.transform_scaled(d2::point((t.scaled_height() - 1) / 2, (t.scaled_width() - 1) / 2));
92 #endif
96 * gd_position is a gradient-descent automatic positioning algorithm
97 * used to determine the location of the view pyramid apex.
99 * Starting from an initial camera position ESTIMATE, this method
100 * determines the angles between each pair of legs in the view volume
101 * (there are six pairs). The sum of squares of the differences
102 * between the thusly determined angles and the known desired angles is
103 * then used as an error metric, and a kind of gradient descent is used
104 * to find a position for which this metric is minimized.
106 static point gd_position(point estimate, d2::point a, d2::point b, d2::point c, d2::point d, d2::transformation t) {
107 ale_pos w = t.scaled_width();
108 ale_pos h = t.scaled_height();
111 * The desired diagonal angle is given (as init_angle).
112 * Calculate the desired side angles as follows:
114 * The distance to the apex is
116 * D = sqrt(h*h + w*w) / (2 * tan(init_angle/2))
118 * Then
120 * desired_h_angle = 2 * arctan(h / (2 * sqrt(D*D + w*w/4)))
121 * desired_w_angle = 2 * arctan(w / (2 * sqrt(D*D + h*h/4)))
124 ale_pos D = (ale_pos) sqrt(h * h + w * w)
125 / (ale_pos) (2 * tan(_init_angle/2));
126 ale_pos desired_h_angle = (double) (2 * atan((double) ((double) h / (2 * sqrt((double) (D*D) + (double) (w*w)/4)))));
127 ale_pos desired_w_angle = (double) (2 * atan((double) ((double) w / (2 * sqrt((double) (D*D) + (double) (h*h)/4)))));
129 ale_pos estimate_h1_angle = estimate.anglebetw(a, b);
130 ale_pos estimate_h2_angle = estimate.anglebetw(c, d);
131 ale_pos estimate_w1_angle = estimate.anglebetw(a, d);
132 ale_pos estimate_w2_angle = estimate.anglebetw(b, c);
133 ale_pos estimate_d1_angle = estimate.anglebetw(a, c);
134 ale_pos estimate_d2_angle = estimate.anglebetw(b, d);
136 ale_pos error = sqrt(pow(estimate_h1_angle - desired_h_angle, 2)
137 + pow(estimate_h2_angle - desired_h_angle, 2)
138 + pow(estimate_w1_angle - desired_w_angle, 2)
139 + pow(estimate_w2_angle - desired_w_angle, 2)
140 + pow(estimate_d1_angle - _init_angle , 2)
141 + pow(estimate_d2_angle - _init_angle , 2));
144 * Vary the magnitude by which each coordinate of the position
145 * can be changed at each step.
148 ale_pos view_angle = _init_angle;
150 for (ale_pos magnitude = estimate[2] / 2;
151 magnitude >= 1;
152 magnitude /= 2) {
155 * Continue searching at this magnitude while error <
156 * old_error. (Initialize old_error accordingly.)
159 ale_pos old_error = error * 2;
161 while(old_error > error) {
163 // ale_pos D = sqrt(h * h + w * w)
164 // / (2 * tan(view_angle/2));
165 // ale_pos desired_h_angle = 2 * atan(h / (2 * sqrt(D*D + w*w/4)));
166 // ale_pos desired_w_angle = 2 * atan(w / (2 * sqrt(D*D + h*h/4)));
168 // fprintf(stderr, ".");
171 // fprintf(stderr, "estimate: [%f %f %f %f %f %f]\n",
172 // estimate_h1_angle,
173 // estimate_h2_angle,
174 // estimate_w1_angle,
175 // estimate_w2_angle,
176 // estimate_d1_angle,
177 // estimate_d2_angle);
179 // fprintf(stderr, "desired : [%f %f %f]\n",
180 // desired_h_angle,
181 // desired_w_angle,
182 // view_angle);
184 old_error = error;
186 for (ale_pos c0 = -magnitude; c0 <= magnitude; c0 += magnitude)
187 for (ale_pos c1 = -magnitude; c1 <= magnitude; c1 += magnitude)
188 for (ale_pos c2 = -magnitude; c2 <= magnitude; c2 += magnitude)
189 for (ale_pos c3 = -magnitude; c3 <= magnitude; c3 += magnitude) {
191 if (c3 > 10)
192 c3 = 10;
194 // fprintf(stderr, "[%f %f %f]\n", c0, c1, c2);
196 estimate[0] += c0;
197 estimate[1] += c1;
198 estimate[2] += c2;
199 // view_angle += c3 / 30;
201 ale_pos D = (ale_pos) sqrt(h * h + w * w)
202 / (ale_pos) (2 * tan(view_angle/2));
203 ale_pos desired_h_angle = 2 * atan((double) h / (2 * sqrt((double) (D*D) + (double) (w*w)/4)));
204 ale_pos desired_w_angle = 2 * atan((double) w / (2 * sqrt((double) D*D + (double) h*h/4)));
206 estimate_h1_angle = estimate.anglebetw(a, b);
207 estimate_h2_angle = estimate.anglebetw(c, d);
208 estimate_w1_angle = estimate.anglebetw(a, d);
209 estimate_w2_angle = estimate.anglebetw(b, c);
210 estimate_d1_angle = estimate.anglebetw(a, c);
211 estimate_d2_angle = estimate.anglebetw(b, d);
213 ale_pos perturbed_error =
214 sqrt(pow(estimate_h1_angle - desired_h_angle, 2)
215 + pow(estimate_h2_angle - desired_h_angle, 2)
216 + pow(estimate_w1_angle - desired_w_angle, 2)
217 + pow(estimate_w2_angle - desired_w_angle, 2)
218 + pow(estimate_d1_angle - view_angle , 2)
219 + pow(estimate_d2_angle - view_angle , 2));
221 if (perturbed_error < error) {
222 error = perturbed_error;
223 } else {
224 estimate[0] -= c0;
225 estimate[1] -= c1;
226 estimate[2] -= c2;
227 // view_angle -= c3 / 30;
233 // fprintf(stderr, "error %f\n", error);
236 return estimate;
239 public:
242 * Get alignment for frame N.
244 static et of(unsigned int n) {
245 assert (n < d2::image_rw::count());
246 assert (alignment_array);
248 return alignment_array[n];
251 static double angle_of(unsigned int n) {
252 return _init_angle;
255 static pt projective(unsigned int n) {
256 return pt(d2::align::of(n), of(n), angle_of(n));
259 static void set_tload(tload_t *t) {
260 tload = t;
263 static void set_tsave(tsave_t *t) {
264 tsave = t;
267 static void write_alignments() {
268 int count = d2::image_rw::count();
270 if (count > 0)
271 tsave_first(tsave, projective(0));
273 for (int i = 1; i < count; i++) {
274 tsave_next(tsave, projective(i));
278 static void vp_adjust() {
279 _vp_adjust = 1;
282 static void vp_noadjust() {
283 _vp_adjust = 0;
286 static void vo_adjust() {
287 _vo_adjust = 1;
290 static void vo_noadjust() {
291 _vo_adjust = 0;
295 * Set the initial estimated diagonal viewing angle of the view
296 * pyramid. This is the angle formed, e.g., between the top-left and
297 * bottom-right legs of the view pyramid.
299 static void init_angle(ale_pos a) {
300 _init_angle = a;
304 * Initialize d3 transformations from d2 transformations.
306 * Here are three possible approaches for calculating camera position
307 * based on a known diagonal viewing angle and known area of
308 * intersection of the view volume with a fixed plane:
310 * (1) divide the rectangular-based pyramidal view volume into two
311 * tetrahedra. A coordinate system is selected so that the triangle of
312 * intersection between one of the tetrahedra and the fixed plane has
313 * coordinates (0, 0, 0), (1, 0, 0), and (a, b, 0), for some (a, b).
314 * The law of cosines is then used to derive three equations
315 * associating the three known angles at the apex with the lengths of
316 * the edges of the tetrahedron. The solution of this system of
317 * equations gives the coordinates of the apex in terms of a, b, and
318 * the known angles. These coordinates are then transformed back to
319 * the original coordinate system.
321 * (2) The gradient descent approach taken by the method gd_position().
323 * (3) Assume that the camera is aimed (roughly) perpendicular to the
324 * plane. In this case, the pyramidal axis forms with each adjacent
325 * pyramidal edge, in combination with a segment in the plane, a right
326 * triangle. The distance of the camera from the plane is d/(2 *
327 * tan(theta/2)), where d is the distance between opposite corners of
328 * the quadrilateral of intersection and theta is the angle between
329 * opposite pairs of edges of the view volume.
331 * As an initial approach to the problem, we use (3) followed by (2).
333 * After position is estimated, we determine orientation from position.
334 * In order to do this, we determine the point at which the axis of
335 * the view pyramid passes through the plane, as described in the
336 * method axis_intersection().
339 static void init_from_d2() {
340 assert (alignment_array == NULL);
343 * Initialize the alignment array.
346 alignment_array = (et *) malloc (d2::image_rw::count() * sizeof(et));
348 assert (alignment_array);
350 if (!alignment_array) {
351 fprintf(stderr, "\n\n*** Unable to allocate memory for 3D alignment array. ***\n\n");
352 exit(1);
356 * Iterate over input frames
359 for (unsigned int i = 0; i < d2::image_rw::count(); i++) {
362 * Obtain the four mapped corners of the quadrilateral
363 * of intersection.
366 d2::transformation t = d2::align::of(i);
368 d2::point a = t.transform_scaled(d2::point(0, 0));
369 d2::point b = t.transform_scaled(d2::point(t.scaled_height() - 1, 0));
370 d2::point c = t.transform_scaled(d2::point(t.scaled_height() - 1, t.scaled_width() - 1));
371 d2::point d = t.transform_scaled(d2::point(0, t.scaled_width() - 1));
374 * Estimate the point at which the pyramidal axis
375 * passes through the plane. We denote the point as
376 * 'e'.
379 d2::point e = axis_intersection(a, b, c, d, t);
382 * Determine the average distance between opposite
383 * corners, and calculate the distance to the camera
384 * based on method (3) described above.
387 ale_pos dist1 = sqrt((a[0] - c[0])
388 * (a[0] - c[0])
389 + (a[1] - c[1])
390 * (a[1] - c[1]));
392 ale_pos dist2 = sqrt((b[0] - d[0])
393 * (b[0] - d[0])
394 + (b[1] - d[1])
395 * (b[1] - d[1]));
397 ale_pos avg_dist = (dist1 + dist2) / 2;
400 ale_pos tangent = tan(_init_angle / 2);
402 ale_pos distance = avg_dist / (2 * tangent);
405 * Set the position of the camera based on the estimate
406 * from method (3). This is used as the initial
407 * position for method (2). We assume that the camera
408 * is placed so that its 0 and 1 coordinates match those
409 * of e.
412 point estimate;
414 estimate[0] = e[0];
415 estimate[1] = e[1];
416 estimate[2] = distance;
418 // fprintf(stderr, "position (n=%d) %f %f %f\n", i, estimate[0], estimate[1], estimate[2]);
421 * Perform method (2).
424 estimate = gd_position(estimate, a, b, c, d, t);
426 // fprintf(stderr, ".");
429 * Assign transformation values based on the output of
430 * method (2), by modifying transformation parameters
431 * from the identity transformation.
434 alignment_array[i] = et::identity();
436 // fprintf(stderr, "position (n=%d) %f %f %f\n", i, estimate[0], estimate[1], estimate[2]);
439 * Modify translation values, if desired.
442 if (_vp_adjust) {
443 alignment_array[i].modify_translation(0, -estimate[0]);
444 alignment_array[i].modify_translation(1, -estimate[1]);
445 alignment_array[i].modify_translation(2, -estimate[2]);
446 } else {
449 * Establish a default translation value.
452 alignment_array[i].modify_translation(2, -1);
456 * Load any transformations.
459 if (i == 0) {
460 int is_default;
461 pt p = tload_first(tload, projective(i), &is_default);
462 alignment_array[i] = p.e();
463 _init_angle = p.view_angle();
464 } else {
465 int is_default;
466 pt p = tload_next(tload, projective(i), &is_default);
467 alignment_array[i] = p.e();
469 if (p.view_angle() != _init_angle) {
470 fprintf(stderr, "Error: variation in view angle is not currently supported.\n");
471 exit(1);
476 * Assert that the z-axis translation is nonzero. This
477 * is important for determining rotation values.
480 assert (estimate[2] != 0);
483 * Check whether rotation adjustment must proceed.
486 if (!_vo_adjust) {
487 continue;
491 * Modify rotation values
493 * The axis of the view pyramid should be mapped onto
494 * the z-axis. Hence, the point of intersection
495 * between the world-coordinates xy-plane and the axis
496 * of the pyramid should occur at (0, 0) in local x-y
497 * coordinates.
499 * If we temporarily assume no rotation about the
500 * z-axis (e2 == 0), then the Euler angles (e0, e1, e2)
501 * used to determine rotation give us:
503 * x' = x cos e1 - (z cos e0 - y sin e0) sin e1
504 * y' = y cos e0 + z sin e0
505 * z' = (z cos e0 - y sin e0) cos e1 + x sin e1
507 * Setting x' = y' = 0, we get:
509 * e0 = arctan (-y/z)
510 * e1 = arctan (x/(z cos e0 - y sin e0))
511 * [optionally add pi to achieve z' < 0]
513 * To set e2, taking T to be the 3D transformation as a
514 * function of e2, we first ensure that
516 * tan(T(e2, a)[1] / T(e2, a)[0])
517 * ==
518 * tan(width / height)
520 * by assigning
522 * e2 += tan(width / height)
523 * - tan(T(e2, a)[1] / T(e2, a)[0])
525 * Once this condition is satisfied, we conditionally
526 * assign
528 * e2 += pi
530 * to ensure that
532 * T(e2, a)[0] < 0
533 * T(e2, a)[1] < 0
535 * which places the transformed point in the lower-left
536 * quadrant. This is correct, since point A is mapped
537 * from (0, 0), the most negative point in the image.
540 point e_translated = alignment_array[i](e);
542 // fprintf(stderr, "axis-intersection (n=%d) e1 %f e0 %f e_t1 %f e_t0 %f e_t2 %f\n",
543 // i,
544 // e[1],
545 // e[0],
546 // alignment_array[i](e)[1],
547 // alignment_array[i](e)[0],
548 // alignment_array[i](e)[2]);
550 // fprintf(stderr, "camera origin (n=%d) o1 %f o0 %f o2 %f o_t1 %f o_t0 %f o_t2 %f\n",
551 // i,
552 // estimate[1],
553 // estimate[0],
554 // estimate[2],
555 // alignment_array[i](estimate)[1],
556 // alignment_array[i](estimate)[0],
557 // alignment_array[i](estimate)[2]);
559 ale_pos e0 = atan(-e_translated[1] / e_translated[2]);
560 ale_pos e1 = atan((double) e_translated[0]
561 / ((double) e_translated[2] * cos(e0)
562 - (double) e_translated[1] * sin(e0)));
564 alignment_array[i].modify_rotation(0, e0);
565 alignment_array[i].modify_rotation(1, e1);
567 if (alignment_array[i](e)[2] > 0)
568 alignment_array[i].modify_rotation(1, M_PI);
570 // fprintf(stderr, "axis-intersection (n=%d) e1 %f e0 %f e_t1 %f e_t0 %f e_t2 %f\n",
571 // i,
572 // e[1],
573 // e[0],
574 // alignment_array[i](e)[1],
575 // alignment_array[i](e)[0],
576 // alignment_array[i](e)[2]);
578 // fprintf(stderr, "camera origin (n=%d) o_t1 %f o_t0 %f o_t2 %f\n",
579 // i,
580 // alignment_array[i](estimate)[1],
581 // alignment_array[i](estimate)[0],
582 // alignment_array[i](estimate)[2]);
584 point a_transformed = alignment_array[i](a);
586 ale_pos e2 = atan((t.scaled_width() - 1) / (t.scaled_height() - 1))
587 - atan(a_transformed[1] / a_transformed[0]);
589 // fprintf(stderr, "a components (n=%d) a1 %f a0 %f\n", i, a[1], a[0]);
590 // fprintf(stderr, "e2 components (n=%d) tw %f th %f a_t1 %f a_t0 %f\n", i,
591 // t.scaled_width(), t.scaled_height(), a_transformed[1], a_transformed[0]);
593 alignment_array[i].modify_rotation(2, e2);
595 // fprintf(stderr, "rotation (n=%d) e0 %f e1 %f e2 %f\n", i, e0, e1, e2);
597 if (alignment_array[i](a)[0] > 0) {
598 e2 += M_PI;
599 alignment_array[i].modify_rotation(2, M_PI);
601 // fprintf(stderr, "adding 2pi to e2.");
604 // fprintf(stderr, "rotation (n=%d) e0 %f e1 %f e2 %f\n", i, e0, e1, e2);
606 // fprintf(stderr, "(0, 0) output (n=%d) a1 %f a0 %f a_t1 %f a_t0 %f\n",
607 // i,
608 // a[1],
609 // a[0],
610 // alignment_array[i](a)[1],
611 // alignment_array[i](a)[0]);
613 assert (alignment_array[i](a)[0] < 0);
614 assert (alignment_array[i](a)[1] < 0);
618 static void adjust_translation(unsigned int n, int d, ale_pos x) {
619 alignment_array[n].modify_translation(d, x);
622 static void adjust_rotation(unsigned int n, int d, ale_pos x) {
623 alignment_array[n].modify_rotation(d, x);
626 static void set_translation(unsigned int n, int d, ale_pos x) {
627 alignment_array[n].set_translation(d, x);
630 static void set_rotation(unsigned int n, int d, ale_pos x) {
631 alignment_array[n].set_rotation(d, x);
634 static void adjust_view_angle(ale_pos x) {
635 _init_angle += x;
638 static void set_view_angle(ale_pos x) {
639 _init_angle = x;
643 #endif