usr.sbin/dntpd/client.c

   1 /*
   2  * Copyright (c) 2005 The DragonFly Project.  All rights reserved.
   3  *
   4  * This code is derived from software contributed to The DragonFly Project
   5  * by Matthew Dillon <dillon@backplane.com>
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  *
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in
  15  *    the documentation and/or other materials provided with the
  16  *    distribution.
  17  * 3. Neither the name of The DragonFly Project nor the names of its
  18  *    contributors may be used to endorse or promote products derived
  19  *    from this software without specific, prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
  25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
  27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32  * SUCH DAMAGE.
  33  *
  34  * $DragonFly: src/usr.sbin/dntpd/client.c,v 1.13 2007/06/26 02:40:20 dillon Exp $
  35  */
  36
  37 #include "defs.h"
  38
  39 static int client_insane(struct server_info **, int, server_info_t);
  40
  41 void
  42 client_init(void)
  43 {
  44 }
  45
  46 int
  47 client_main(struct server_info **info_ary, int count)
  48 {
  49     struct server_info *best_off;
  50     struct server_info *best_freq;
  51     double last_freq;
  52     double freq;
  53     double offset;
  54     int calc_offset_correction;
  55     int didreconnect;
  56     int i;
  57     int insane;
  58
  59     last_freq = 0.0;
  60
  61     for (;;) {
  62         /*
  63          * Subtract the interval from poll_sleep and poll the client
  64          * if it reaches 0.
  65          *
  66          * Because we do not compensate for offset corrections which are
  67          * in progress, we cannot accumulate data for an offset correction
  68          * while a prior correction is still being worked through by the
  69          * system.
  70          */
  71         calc_offset_correction = !sysntp_offset_correction_is_running();
  72         for (i = 0; i < count; ++i)
  73             client_poll(info_ary[i], min_sleep_opt, calc_offset_correction);
  74
  75         /*
  76          * Find the best client (or synthesize one).  A different client
  77          * can be chosen for frequency and offset.  Note in particular
  78          * that offset counters and averaging code gets reset when an
  79          * offset correction is made (otherwise the averaging history will
  80          * cause later corrections to overshoot).
  81          *
  82          * The regression used to calculate the frequency is a much
  83          * longer-term entity and is NOT reset, so it is still possible
  84          * for the offset correction code to make minor adjustments to
  85          * the frequency if it so desires.
  86          *
  87          * client_check may replace the server_info pointer with a new
  88          * one.
  89          */
  90         best_off = NULL;
  91         best_freq = NULL;
  92         for (i = 0; i < count; ++i)
  93             client_check(&info_ary[i], &best_off, &best_freq);
  94
  95         /*
  96          * Check for server insanity.  In large NNTP pools some servers
  97          * may just be dead wrong, but report that they are right.
  98          */
  99         if (best_off) {
 100             insane = client_insane(info_ary, count, best_off);
 101             if (insane > 0) {
 102                 /*
 103                  * best_off meets the quorum requirements and is good
 104                  * (keep best_off)
 105                  */
 106                 best_off->server_insane = 0;
 107             } else if (insane == 0) {
 108                 /*
 109                  * best_off is probably good, but we do not have enough
 110                  * servers reporting yet to meet the quorum requirements.
 111                  */
 112                 best_off = NULL;
 113             } else {
 114                 /*
 115                  * best_off is ugly, mark the server as being insane for
 116                  * 60 minutes.
 117                  */
 118                 best_off->server_insane = 60 * 60;
 119                 logdebuginfo(best_off, 1,
 120                              "excessive offset deviation, mapping out\n");
 121                 best_off = NULL;
 122             }
 123         }
 124
 125         /*
 126          * Offset correction.
 127          */
 128         if (best_off) {
 129             offset = best_off->lin_sumoffset / best_off->lin_countoffset;
 130             lin_resetalloffsets(info_ary, count);
 131             if (offset < -COURSE_OFFSET_CORRECTION_LIMIT ||
 132                 offset > COURSE_OFFSET_CORRECTION_LIMIT ||
 133                 quickset_opt
 134             ) {
 135                 freq = sysntp_correct_course_offset(offset);
 136                 quickset_opt = 0;
 137             } else {
 138                 freq = sysntp_correct_offset(offset);
 139             }
 140         } else {
 141             freq = 0.0;
 142         }
 143
 144         /*
 145          * Frequency correction (throw away minor freq adjusts from the
 146          * offset code if we can't do a frequency correction here).  Do
 147          * not reissue if it hasn't changed from the last issued correction.
 148          */
 149         if (best_freq) {
 150             freq += best_freq->lin_cache_freq;
 151             if (last_freq != freq) {
 152                 sysntp_correct_freq(freq);
 153                 last_freq = freq;
 154             }
 155         }
 156
 157         /*
 158          * This function is responsible for managing the polling mode and
 159          * figures out how long we should sleep.
 160          */
 161         didreconnect = 0;
 162         for (i = 0; i < count; ++i)
 163             client_manage_polling_mode(info_ary[i], &didreconnect);
 164         if (didreconnect)
 165             client_check_duplicate_ips(info_ary, count);
 166
 167         /*
 168          * Polling loop sleep.
 169          */
 170         usleep(min_sleep_opt * 1000000 + random() % 500000);
 171     }
 172 }
 173
 174 void
 175 client_poll(server_info_t info, int poll_interval, int calc_offset_correction)
 176 {
 177     struct timeval rtv;
 178     struct timeval ltv;
 179     struct timeval lbtv;
 180     double offset;
 181
 182     /*
 183      * Adjust the insane-server countdown
 184      */
 185     if (info->server_insane > poll_interval)
 186         info->server_insane -= poll_interval;
 187     else
 188         info->server_insane = 0;
 189
 190     /*
 191      * By default we always poll.  If the polling interval comes under
 192      * active management the poll_sleep will be non-zero.
 193      */
 194     if (info->poll_sleep > poll_interval) {
 195         info->poll_sleep -= poll_interval;
 196         return;
 197     }
 198     info->poll_sleep = 0;
 199
 200     /*
 201      * If the client isn't open don't mess with the poll_failed count
 202      * or anything else.  We are left in the init or startup phase.
 203      */
 204     if (info->fd < 0) {
 205         if (info->poll_failed < 0x7FFFFFFF)
 206             ++info->poll_failed;
 207         return;
 208     }
 209
 210     logdebuginfo(info, 4, "poll, ");
 211     if (udp_ntptimereq(info->fd, &rtv, &ltv, &lbtv) < 0) {
 212         ++info->poll_failed;
 213         logdebug(4, "no response (%d failures in a row)\n", info->poll_failed);
 214         if (info->poll_failed == POLL_FAIL_RESET) {
 215             if (info->lin_count != 0) {
 216                 logdebuginfo(info, 4, "resetting regression due to failures\n");
 217             }
 218             lin_reset(info);
 219         }
 220         return;
 221     }
 222
 223     /*
 224      * Successful query.  Update polling info for the polling mode manager.
 225      */
 226     ++info->poll_count;
 227     info->poll_failed = 0;
 228
 229     /*
 230      * Figure out the offset (the difference between the reported
 231      * time and our current time) for linear regression purposes.
 232      */
 233     offset = tv_delta_double(&rtv, &ltv);
 234
 235     while (info) {
 236         /*
 237          * Linear regression
 238          */
 239         if (debug_level >= 4) {
 240             struct tm *tp;
 241             char buf[64];
 242             time_t t;
 243
 244             t = rtv.tv_sec;
 245             tp = localtime(&t);
 246             strftime(buf, sizeof(buf), "%d-%b-%Y %H:%M:%S", tp);
 247             logdebug(4, "%s.%03ld ", buf, rtv.tv_usec / 1000);
 248         }
 249         lin_regress(info, &ltv, &lbtv, offset, calc_offset_correction);
 250         info = info->altinfo;
 251         if (info && debug_level >= 4) {
 252             logdebug(4, "%*.*s: poll, ",
 253                 (int)strlen(info->target),
 254                 (int)strlen(info->target), "(alt)");
 255         }
 256     }
 257 }
 258
 259 /*
 260  * Find the best client (or synthesize a fake info structure to return).
 261  * We can find separate best clients for offset and frequency.
 262  */
 263 void
 264 client_check(struct server_info **checkp,
 265              struct server_info **best_off,
 266              struct server_info **best_freq)
 267 {
 268     struct server_info *check = *checkp;
 269     struct server_info *info;
 270
 271     /*
 272      * Start an alternate linear regression once our current one
 273      * has passed a certain point.
 274      */
 275     if (check->lin_count >= LIN_RESTART / 2 && check->altinfo == NULL) {
 276         info = malloc(sizeof(*info));
 277         assert(info != NULL);
 278         /* note: check->altinfo is NULL as of the bcopy */
 279         bcopy(check, info, sizeof(*info));
 280         check->altinfo = info;
 281         lin_reset(info);
 282     }
 283
 284     /*
 285      * Replace our current linear regression with the alternate once
 286      * the current one has hit its limit (beyond a certain point the
 287      * linear regression starts to work against us, preventing us from
 288      * reacting to changing conditions).
 289      *
 290      * Report any significant change in the offset or ppm.
 291      */
 292     if (check->lin_count >= LIN_RESTART) {
 293         if ((info = check->altinfo) && info->lin_count >= LIN_RESTART / 2) {
 294             double freq_diff;
 295
 296             freq_diff = info->lin_cache_freq - check->lin_cache_freq;
 297             logdebuginfo(info, 4, "Switching to alternate, Frequence "
 298                          "difference is %6.3f ppm\n",
 299                          freq_diff * 1.0E+6);
 300             *checkp = info;
 301             free(check);
 302             check = info;
 303         }
 304     }
 305
 306     /*
 307      * BEST CLIENT FOR FREQUENCY CORRECTION:
 308      *
 309      *  8 samples and a correlation > 0.99, or
 310      * 16 samples and a correlation > 0.96
 311      */
 312     info = *best_freq;
 313     if ((check->lin_count >= 8 && fabs(check->lin_cache_corr) >= 0.99) ||
 314         (check->lin_count >= 16 && fabs(check->lin_cache_corr) >= 0.96)
 315     ) {
 316         if (info == NULL ||
 317             fabs(check->lin_cache_corr) > fabs(info->lin_cache_corr)
 318         ) {
 319             info = check;
 320             *best_freq = info;
 321         }
 322
 323     }
 324
 325     /*
 326      * BEST CLIENT FOR OFFSET CORRECTION:
 327      *
 328      * Use the standard-deviation and require at least 4 samples.  An
 329      * offset correction is valid if the standard deviation is less then
 330      * the average offset divided by 4.
 331      *
 332      * Servers marked as being insane are not allowed
 333      */
 334     info = *best_off;
 335     if (check->lin_countoffset >= 4 &&
 336         (check->lin_cache_stddev <
 337          fabs(check->lin_sumoffset / check->lin_countoffset / 4)) &&
 338         check->server_insane == 0
 339      ) {
 340         if (info == NULL ||
 341             fabs(check->lin_cache_stddev) < fabs(info->lin_cache_stddev)
 342         ) {
 343             info = check;
 344             *best_off = info;
 345         }
 346     }
 347 }
 348
 349 /*
 350  * Actively manage the polling interval.  Note that the poll_* fields are
 351  * always transfered to the alternate regression when the check code replaces
 352  * the current regression with a new one.
 353  *
 354  * This routine is called from the main loop for each base info structure.
 355  * The polling mode applies to all alternates so we do not have to iterate
 356  * through the alt's.
 357  */
 358 void
 359 client_manage_polling_mode(struct server_info *info, int *didreconnect)
 360 {
 361     /*
 362      * Permanently failed servers are ignored.
 363      */
 364     if (info->server_state == -2)
 365         return;
 366
 367     /*
 368      * Our polling interval has not yet passed.
 369      */
 370     if (info->poll_sleep)
 371         return;
 372
 373     /*
 374      * Standard polling mode progression
 375      */
 376     switch(info->poll_mode) {
 377     case POLL_FIXED:
 378         /*
 379          * Initial state after connect or when a reconnect is required.
 380          */
 381         if (info->fd < 0) {
 382             logdebuginfo(info, 2, "polling mode INIT, relookup & reconnect\n");
 383             reconnect_server(info);
 384             *didreconnect = 1;
 385             if (info->fd < 0) {
 386                 if (info->poll_failed >= POLL_RECOVERY_RESTART * 5)
 387                     info->poll_sleep = max_sleep_opt;
 388                 else if (info->poll_failed >= POLL_RECOVERY_RESTART)
 389                     info->poll_sleep = nom_sleep_opt;
 390                 else
 391                     info->poll_sleep = min_sleep_opt;
 392                 break;
 393             }
 394
 395             /*
 396              * Transition the server to the DNS lookup successful state.
 397              * Note that the server state does not transition out of
 398              * lookup successful if we relookup after a packet failure
 399              * so the message is printed only once, usually.
 400              */
 401             client_setserverstate(info, 0, "DNS lookup success");
 402
 403             /*
 404              * If we've failed many times switch to the startup state but
 405              * do not fall through into it.  break the switch and a single
 406              * poll will be made after the nominal polling interval.
 407              */
 408             if (info->poll_failed >= POLL_RECOVERY_RESTART * 5) {
 409                 logdebuginfo(info, 2, "polling mode INIT->STARTUP (very slow)\n");
 410                 info->poll_mode = POLL_STARTUP;
 411                 info->poll_sleep = max_sleep_opt;
 412                 info->poll_count = 0;
 413                 break;
 414             } else if (info->poll_failed >= POLL_RECOVERY_RESTART) {
 415                 logdebuginfo(info, 2, "polling mode INIT->STARTUP (slow)\n");
 416                 info->poll_mode = POLL_STARTUP;
 417                 info->poll_count = 0;
 418                 break;
 419             }
 420         }
 421
 422         /*
 423          * Fall through to the startup state.
 424          */
 425         info->poll_mode = POLL_STARTUP;
 426         logdebuginfo(info, 2, "polling mode INIT->STARTUP (normal)\n");
 427         /* fall through */
 428     case POLL_STARTUP:
 429         /*
 430          * Transition to a FAILED state if too many poll failures occured.
 431          */
 432         if (info->poll_failed >= POLL_FAIL_RESET) {
 433             logdebuginfo(info, 2, "polling mode STARTUP->FAILED\n");
 434             info->poll_mode = POLL_FAILED;
 435             info->poll_count = 0;
 436             break;
 437         }
 438
 439         /*
 440          * Transition the server to operational.  Do a number of minimum
 441          * interval polls to try to get a good offset calculation quickly.
 442          */
 443         if (info->poll_count)
 444             client_setserverstate(info, 1, "connected ok");
 445         if (info->poll_count < POLL_STARTUP_MAX) {
 446             info->poll_sleep = min_sleep_opt;
 447             break;
 448         }
 449
 450         /*
 451          * Once we've got our polls fall through to aquisition mode to
 452          * do aquisition processing.
 453          */
 454         info->poll_mode = POLL_ACQUIRE;
 455         info->poll_count = 0;
 456         logdebuginfo(info, 2, "polling mode STARTUP->ACQUIRE\n");
 457         /* fall through */
 458     case POLL_ACQUIRE:
 459         /*
 460          * Transition to a FAILED state if too many poll failures occured.
 461          */
 462         if (info->poll_failed >= POLL_FAIL_RESET) {
 463             logdebuginfo(info, 2, "polling mode STARTUP->FAILED\n");
 464             info->poll_mode = POLL_FAILED;
 465             info->poll_count = 0;
 466             break;
 467         }
 468
 469         /*
 470          * Acquisition mode using the nominal timeout.  We do not shift
 471          * to maintainance mode unless the correlation is at least 0.90
 472          */
 473         if (info->poll_count < POLL_ACQUIRE_MAX ||
 474             info->lin_count < 8 ||
 475             fabs(info->lin_cache_corr) < 0.85
 476         ) {
 477             if (info->poll_count >= POLL_ACQUIRE_MAX &&
 478                 info->lin_count == LIN_RESTART - 2
 479             ) {
 480                 logdebuginfo(info, 2,
 481                     "WARNING: Unable to shift this source to "
 482                     "maintenance mode.  Target correlation is aweful\n");
 483             }
 484             break;
 485         }
 486         info->poll_mode = POLL_MAINTAIN;
 487         info->poll_count = 0;
 488         logdebuginfo(info, 2, "polling mode ACQUIRE->MAINTAIN\n");
 489         /* fall through */
 490     case POLL_MAINTAIN:
 491         /*
 492          * Transition to a FAILED state if too many poll failures occured.
 493          */
 494         if (info->poll_failed >= POLL_FAIL_RESET) {
 495             logdebuginfo(info, 2, "polling mode STARTUP->FAILED\n");
 496             info->poll_mode = POLL_FAILED;
 497             info->poll_count = 0;
 498             break;
 499         }
 500
 501         /*
 502          * Maintaince mode, max polling interval.
 503          *
 504          * Transition back to acquisition mode if we are unable to maintain
 505          * this mode due to the correlation going bad.
 506          */
 507         if (info->lin_count >= LIN_RESTART / 2 &&
 508             fabs(info->lin_cache_corr) < 0.70
 509         ) {
 510             logdebuginfo(info, 2,
 511                 "polling mode MAINTAIN->ACQUIRE.  Unable to maintain\n"
 512                 "the maintenance mode because the correlation went"
 513                 " bad!\n");
 514             info->poll_mode = POLL_ACQUIRE;
 515             info->poll_count = 0;
 516             break;
 517         }
 518         info->poll_sleep = max_sleep_opt;
 519         break;
 520     case POLL_FAILED:
 521         /*
 522          * We have a communications failure.  A late recovery is possible
 523          * if we enter this state with a good poll.
 524          */
 525         if (info->poll_count != 0) {
 526             logdebuginfo(info, 2, "polling mode FAILED->ACQUIRE\n");
 527             if (info->poll_failed >= POLL_FAIL_RESET)
 528                 info->poll_mode = POLL_STARTUP;
 529             else
 530                 info->poll_mode = POLL_ACQUIRE;
 531             /* do not reset poll_count */
 532             break;
 533         }
 534
 535         /*
 536          * If we have been failed too long, disconnect from the server
 537          * and start us all over again.  Note that the failed count is not
 538          * reset to 0.
 539          */
 540         if (info->poll_failed >= POLL_RECOVERY_RESTART) {
 541             logdebuginfo(info, 2, "polling mode FAILED->INIT\n");
 542             client_setserverstate(info, 0, "FAILED");
 543             disconnect_server(info);
 544             info->poll_mode = POLL_FIXED;
 545             break;
 546         }
 547         break;
 548     }
 549
 550     /*
 551      * If the above state machine has not set a polling interval, set a
 552      * nominal polling interval.
 553      */
 554     if (info->poll_sleep == 0)
 555         info->poll_sleep = nom_sleep_opt;
 556 }
 557
 558 /*
 559  * Look for duplicate IP addresses.  This is done very inoften, so we do
 560  * not use a particularly efficient algorithm.
 561  *
 562  * Only reconnect a client which has not done its initial poll.
 563  */
 564 void
 565 client_check_duplicate_ips(struct server_info **info_ary, int count)
 566 {
 567     server_info_t info1;
 568     server_info_t info2;
 569     int tries;
 570     int i;
 571     int j;
 572
 573     for (i = 0; i < count; ++i) {
 574         info1 = info_ary[i];
 575         if (info1->fd < 0 || info1->server_state != 0)
 576             continue;
 577         for (tries = 0; tries < 10; ++tries) {
 578             for (j = 0; j < count; ++j) {
 579                 info2 = info_ary[j];
 580                 if (i == j || info2->fd < 0)
 581                     continue;
 582                 if (strcmp(info1->ipstr, info2->ipstr) == 0) {
 583                     reconnect_server(info1);
 584                     break;
 585                 }
 586             }
 587             if (j == count)
 588                 break;
 589         }
 590         if (tries == 10) {
 591             disconnect_server(info1);
 592             client_setserverstate(info1, -2,
 593                                   "permanently disabling duplicate server");
 594         }
 595     }
 596 }
 597
 598 /*
 599  * Calculate whether the server pointed to by *bestp is insane or not.
 600  * For some reason some servers in e.g. the ntp pool are sometimes an hour
 601  * off.  If we have at least three servers in the pool require that a
 602  * quorum agree that the current best server's offset is reasonable.
 603  *
 604  * Allow +/- 0.5 seconds of error for now (settable with option).
 605  *
 606  * Returns -1 if insane, 0 if not enough samples, and 1 if ok
 607  */
 608 static
 609 int
 610 client_insane(struct server_info **info_ary, int count, server_info_t best)
 611 {
 612     server_info_t info;
 613     double best_offset;
 614     double info_offset;
 615     int good;
 616     int bad;
 617     int skip;
 618     int quorum;
 619     int i;
 620
 621     /*
 622      * If only one ntp server we cannot check to see if it is insane
 623      */
 624     if (count < 2)
 625             return(1);
 626     best_offset = best->lin_sumoffset / best->lin_countoffset;
 627
 628     /*
 629      * Calculated the quorum.  Do not count permanently failed servers
 630      * in the calculation.
 631      *
 632      * adjusted count   quorum
 633      *   2                2
 634      *   3                2
 635      *   4                3
 636      *   5                3
 637      */
 638     quorum = count;
 639     for (i = 0; i < count; ++i) {
 640         info = info_ary[i];
 641         if (info->server_state == -2)
 642             --quorum;
 643     }
 644
 645     quorum = quorum / 2 + 1;
 646     good = 0;
 647     bad = 0;
 648     skip = 0;
 649
 650     /*
 651      * Find the good, the bad, and the ugly.  We need at least four samples
 652      * and a stddev within the deviation being checked to count a server
 653      * in the calculation.
 654      */
 655     for (i = 0; i < count; ++i) {
 656         info = info_ary[i];
 657         if (info->lin_countoffset < 4 ||
 658             info->lin_cache_stddev > insane_deviation
 659         ) {
 660             ++skip;
 661             continue;
 662         }
 663
 664         info_offset = info->lin_sumoffset / info->lin_countoffset;
 665         info_offset -= best_offset;
 666         if (info_offset < -insane_deviation || info_offset > insane_deviation)
 667                 ++bad;
 668         else
 669                 ++good;
 670     }
 671
 672     /*
 673      * Did we meet our quorum?
 674      */
 675     logdebuginfo(best, 5, "insanecheck good=%d bad=%d skip=%d "
 676                           "quorum=%d (allowed=%-+8.6f)\n",
 677                  good, bad, skip, quorum, insane_deviation);
 678     if (good >= quorum)
 679         return(1);
 680     if (good + skip >= quorum)
 681         return(0);
 682     return(-1);
 683 }
 684
 685 /*
 686  * Linear regression.
 687  *
 688  *      ltv     local time as of when the offset error was calculated between
 689  *              local time and remote time.
 690  *
 691  *      lbtv    base time as of when local time was obtained.  Used to
 692  *              calculate the cumulative corrections made to the system's
 693  *              real time clock so we can de-correct the offset for the
 694  *              linear regression.
 695  *
 696  * X is the time axis, in seconds.
 697  * Y is the uncorrected offset, in seconds.
 698  */
 699 void
 700 lin_regress(server_info_t info, struct timeval *ltv, struct timeval *lbtv,
 701             double offset, int calc_offset_correction)
 702 {
 703     double time_axis;
 704     double uncorrected_offset;
 705
 706     /*
 707      * De-correcting the offset:
 708      *
 709      *  The passed offset is (our_real_time - remote_real_time).  To remove
 710      *  corrections from our_real_time we take the difference in the basetime
 711      *  (new_base_time - old_base_time) and subtract that from the offset.
 712      *  That is, if the basetime goesup, the uncorrected offset goes down.
 713      */
 714     if (info->lin_count == 0) {
 715         info->lin_tv = *ltv;
 716         info->lin_btv = *lbtv;
 717         time_axis = 0;
 718         uncorrected_offset = offset;
 719     } else {
 720         time_axis = tv_delta_double(&info->lin_tv, ltv);
 721         uncorrected_offset = offset - tv_delta_double(&info->lin_btv, lbtv);
 722     }
 723
 724     /*
 725      * We have to use the uncorrected offset for frequency calculations.
 726      */
 727     ++info->lin_count;
 728     info->lin_sumx += time_axis;
 729     info->lin_sumx2 += time_axis * time_axis;
 730     info->lin_sumy += uncorrected_offset;
 731     info->lin_sumy2 += uncorrected_offset * uncorrected_offset;
 732     info->lin_sumxy += time_axis * uncorrected_offset;
 733
 734     /*
 735      * We have to use the corrected offset for offset calculations.
 736      */
 737     if (calc_offset_correction) {
 738         ++info->lin_countoffset;
 739         info->lin_sumoffset += offset;
 740         info->lin_sumoffset2 += offset * offset;
 741     }
 742
 743     /*
 744      * Calculate various derived values.   This gets us slope, y-intercept,
 745      * and correlation from the linear regression.
 746      */
 747     if (info->lin_count > 1) {
 748         info->lin_cache_slope =
 749          (info->lin_count * info->lin_sumxy - info->lin_sumx * info->lin_sumy) /
 750          (info->lin_count * info->lin_sumx2 - info->lin_sumx * info->lin_sumx);
 751
 752         info->lin_cache_yint =
 753          (info->lin_sumy - info->lin_cache_slope * info->lin_sumx) /
 754          (info->lin_count);
 755
 756         info->lin_cache_corr =
 757          (info->lin_count * info->lin_sumxy - info->lin_sumx * info->lin_sumy) /
 758          sqrt((info->lin_count * info->lin_sumx2 -
 759                       info->lin_sumx * info->lin_sumx) *
 760              (info->lin_count * info->lin_sumy2 -
 761                       info->lin_sumy * info->lin_sumy)
 762          );
 763     }
 764
 765     /*
 766      * Calculate more derived values.  This gets us the standard-deviation
 767      * of offsets.  The standard deviation approximately means that 68%
 768      * of the samples fall within the calculated stddev of the mean.
 769      */
 770     if (info->lin_countoffset > 1) {
 771          info->lin_cache_stddev =
 772              sqrt((info->lin_sumoffset2 -
 773                  ((info->lin_sumoffset * info->lin_sumoffset /
 774                    info->lin_countoffset))) /
 775                  (info->lin_countoffset - 1.0));
 776     }
 777
 778     /*
 779      * Save the most recent offset, we might use it in the future.
 780      * Save the frequency correction (we might scale the slope later so
 781      * we have a separate field for the actual frequency correction in
 782      * seconds per second).
 783      */
 784     info->lin_cache_offset = offset;
 785     info->lin_cache_freq = info->lin_cache_slope;
 786
 787     if (debug_level >= 4) {
 788         logdebuginfo(info, 4, "iter=%2d time=%7.3f off=%+.6f uoff=%+.6f",
 789             (int)info->lin_count,
 790             time_axis, offset, uncorrected_offset);
 791         if (info->lin_count > 1) {
 792             logdebug(4, " slope %+7.6f"
 793                             " yint %+3.2f corr %+7.6f freq_ppm %+4.2f",
 794                 info->lin_cache_slope,
 795                 info->lin_cache_yint,
 796                 info->lin_cache_corr,
 797                 info->lin_cache_freq * 1000000.0);
 798         }
 799         if (info->lin_countoffset > 1) {
 800             logdebug(4, " stddev %7.6f", info->lin_cache_stddev);
 801         } else if (calc_offset_correction == 0) {
 802             /* cannot calculate offset correction due to prior correction */
 803             logdebug(4, " offset_ignored");
 804         }
 805         logdebug(4, "\n");
 806     }
 807 }
 808
 809 /*
 810  * Reset the linear regression data.  The info structure will not again be
 811  * a candidate for frequency or offset correction until sufficient data
 812  * has been accumulated to make a decision.
 813  */
 814 void
 815 lin_reset(server_info_t info)
 816 {
 817     server_info_t scan;
 818
 819     info->lin_count = 0;
 820     info->lin_sumx = 0;
 821     info->lin_sumy = 0;
 822     info->lin_sumxy = 0;
 823     info->lin_sumx2 = 0;
 824     info->lin_sumy2 = 0;
 825
 826     info->lin_countoffset = 0;
 827     info->lin_sumoffset = 0;
 828     info->lin_sumoffset2 = 0;
 829
 830     info->lin_cache_slope = 0;
 831     info->lin_cache_yint = 0;
 832     info->lin_cache_corr = 0;
 833     info->lin_cache_offset = 0;
 834     info->lin_cache_freq = 0;
 835
 836     /*
 837      * Destroy any additional alternative regressions.
 838      */
 839     while ((scan = info->altinfo) != NULL) {
 840         info->altinfo = scan->altinfo;
 841         free(scan);
 842     }
 843 }
 844
 845 /*
 846  * Sometimes we want to clean out the offset calculations without
 847  * destroying the linear regression used to figure out the frequency
 848  * correction.  This usually occurs whenever we issue an offset
 849  * adjustment to the system, which invalidates any offset data accumulated
 850  * up to that point.
 851  */
 852 void
 853 lin_resetalloffsets(struct server_info **info_ary, int count)
 854 {
 855     server_info_t info;
 856     int i;
 857
 858     for (i = 0; i < count; ++i) {
 859         for (info = info_ary[i]; info; info = info->altinfo)
 860             lin_resetoffsets(info);
 861     }
 862 }
 863
 864 void
 865 lin_resetoffsets(server_info_t info)
 866 {
 867     info->lin_countoffset = 0;
 868     info->lin_sumoffset = 0;
 869     info->lin_sumoffset2 = 0;
 870 }
 871
 872 void
 873 client_setserverstate(server_info_t info, int state, const char *str)
 874 {
 875     if (info->server_state != state) {
 876         info->server_state = state;
 877         logdebuginfo(info, 1, "%s\n", str);
 878     }
 879 }
 880