parallel-checkout.c

   1 #include "cache.h"
   2 #include "config.h"
   3 #include "entry.h"
   4 #include "parallel-checkout.h"
   5 #include "pkt-line.h"
   6 #include "progress.h"
   7 #include "run-command.h"
   8 #include "sigchain.h"
   9 #include "streaming.h"
  10 #include "thread-utils.h"
  11
  12 struct pc_worker {
  13         struct child_process cp;
  14         size_t next_item_to_complete, nr_items_to_complete;
  15 };
  16
  17 struct parallel_checkout {
  18         enum pc_status status;
  19         struct parallel_checkout_item *items; /* The parallel checkout queue. */
  20         size_t nr, alloc;
  21         struct progress *progress;
  22         unsigned int *progress_cnt;
  23 };
  24
  25 static struct parallel_checkout parallel_checkout;
  26
  27 enum pc_status parallel_checkout_status(void)
  28 {
  29         return parallel_checkout.status;
  30 }
  31
  32 static const int DEFAULT_THRESHOLD_FOR_PARALLELISM = 100;
  33 static const int DEFAULT_NUM_WORKERS = 1;
  34
  35 void get_parallel_checkout_configs(int *num_workers, int *threshold)
  36 {
  37         if (git_config_get_int("checkout.workers", num_workers))
  38                 *num_workers = DEFAULT_NUM_WORKERS;
  39         else if (*num_workers < 1)
  40                 *num_workers = online_cpus();
  41
  42         if (git_config_get_int("checkout.thresholdForParallelism", threshold))
  43                 *threshold = DEFAULT_THRESHOLD_FOR_PARALLELISM;
  44 }
  45
  46 void init_parallel_checkout(void)
  47 {
  48         if (parallel_checkout.status != PC_UNINITIALIZED)
  49                 BUG("parallel checkout already initialized");
  50
  51         parallel_checkout.status = PC_ACCEPTING_ENTRIES;
  52 }
  53
  54 static void finish_parallel_checkout(void)
  55 {
  56         if (parallel_checkout.status == PC_UNINITIALIZED)
  57                 BUG("cannot finish parallel checkout: not initialized yet");
  58
  59         free(parallel_checkout.items);
  60         memset(&parallel_checkout, 0, sizeof(parallel_checkout));
  61 }
  62
  63 static int is_eligible_for_parallel_checkout(const struct cache_entry *ce,
  64                                              const struct conv_attrs *ca)
  65 {
  66         enum conv_attrs_classification c;
  67         size_t packed_item_size;
  68
  69         /*
  70          * Symlinks cannot be checked out in parallel as, in case of path
  71          * collision, they could racily replace leading directories of other
  72          * entries being checked out. Submodules are checked out in child
  73          * processes, which have their own parallel checkout queues.
  74          */
  75         if (!S_ISREG(ce->ce_mode))
  76                 return 0;
  77
  78         packed_item_size = sizeof(struct pc_item_fixed_portion) + ce->ce_namelen +
  79                 (ca->working_tree_encoding ? strlen(ca->working_tree_encoding) : 0);
  80
  81         /*
  82          * The amount of data we send to the workers per checkout item is
  83          * typically small (75~300B). So unless we find an insanely huge path
  84          * of 64KB, we should never reach the 65KB limit of one pkt-line. If
  85          * that does happen, we let the sequential code handle the item.
  86          */
  87         if (packed_item_size > LARGE_PACKET_DATA_MAX)
  88                 return 0;
  89
  90         c = classify_conv_attrs(ca);
  91         switch (c) {
  92         case CA_CLASS_INCORE:
  93                 return 1;
  94
  95         case CA_CLASS_INCORE_FILTER:
  96                 /*
  97                  * It would be safe to allow concurrent instances of
  98                  * single-file smudge filters, like rot13, but we should not
  99                  * assume that all filters are parallel-process safe. So we
 100                  * don't allow this.
 101                  */
 102                 return 0;
 103
 104         case CA_CLASS_INCORE_PROCESS:
 105                 /*
 106                  * The parallel queue and the delayed queue are not compatible,
 107                  * so they must be kept completely separated. And we can't tell
 108                  * if a long-running process will delay its response without
 109                  * actually asking it to perform the filtering. Therefore, this
 110                  * type of filter is not allowed in parallel checkout.
 111                  *
 112                  * Furthermore, there should only be one instance of the
 113                  * long-running process filter as we don't know how it is
 114                  * managing its own concurrency. So, spreading the entries that
 115                  * requisite such a filter among the parallel workers would
 116                  * require a lot more inter-process communication. We would
 117                  * probably have to designate a single process to interact with
 118                  * the filter and send all the necessary data to it, for each
 119                  * entry.
 120                  */
 121                 return 0;
 122
 123         case CA_CLASS_STREAMABLE:
 124                 return 1;
 125
 126         default:
 127                 BUG("unsupported conv_attrs classification '%d'", c);
 128         }
 129 }
 130
 131 int enqueue_checkout(struct cache_entry *ce, struct conv_attrs *ca)
 132 {
 133         struct parallel_checkout_item *pc_item;
 134
 135         if (parallel_checkout.status != PC_ACCEPTING_ENTRIES ||
 136             !is_eligible_for_parallel_checkout(ce, ca))
 137                 return -1;
 138
 139         ALLOC_GROW(parallel_checkout.items, parallel_checkout.nr + 1,
 140                    parallel_checkout.alloc);
 141
 142         pc_item = &parallel_checkout.items[parallel_checkout.nr];
 143         pc_item->ce = ce;
 144         memcpy(&pc_item->ca, ca, sizeof(pc_item->ca));
 145         pc_item->status = PC_ITEM_PENDING;
 146         pc_item->id = parallel_checkout.nr;
 147         parallel_checkout.nr++;
 148
 149         return 0;
 150 }
 151
 152 size_t pc_queue_size(void)
 153 {
 154         return parallel_checkout.nr;
 155 }
 156
 157 static void advance_progress_meter(void)
 158 {
 159         if (parallel_checkout.progress) {
 160                 (*parallel_checkout.progress_cnt)++;
 161                 display_progress(parallel_checkout.progress,
 162                                  *parallel_checkout.progress_cnt);
 163         }
 164 }
 165
 166 static int handle_results(struct checkout *state)
 167 {
 168         int ret = 0;
 169         size_t i;
 170         int have_pending = 0;
 171
 172         /*
 173          * We first update the successfully written entries with the collected
 174          * stat() data, so that they can be found by mark_colliding_entries(),
 175          * in the next loop, when necessary.
 176          */
 177         for (i = 0; i < parallel_checkout.nr; i++) {
 178                 struct parallel_checkout_item *pc_item = &parallel_checkout.items[i];
 179                 if (pc_item->status == PC_ITEM_WRITTEN)
 180                         update_ce_after_write(state, pc_item->ce, &pc_item->st);
 181         }
 182
 183         for (i = 0; i < parallel_checkout.nr; i++) {
 184                 struct parallel_checkout_item *pc_item = &parallel_checkout.items[i];
 185
 186                 switch(pc_item->status) {
 187                 case PC_ITEM_WRITTEN:
 188                         /* Already handled */
 189                         break;
 190                 case PC_ITEM_COLLIDED:
 191                         /*
 192                          * The entry could not be checked out due to a path
 193                          * collision with another entry. Since there can only
 194                          * be one entry of each colliding group on the disk, we
 195                          * could skip trying to check out this one and move on.
 196                          * However, this would leave the unwritten entries with
 197                          * null stat() fields on the index, which could
 198                          * potentially slow down subsequent operations that
 199                          * require refreshing it: git would not be able to
 200                          * trust st_size and would have to go to the filesystem
 201                          * to see if the contents match (see ie_modified()).
 202                          *
 203                          * Instead, let's pay the overhead only once, now, and
 204                          * call checkout_entry_ca() again for this file, to
 205                          * have its stat() data stored in the index. This also
 206                          * has the benefit of adding this entry and its
 207                          * colliding pair to the collision report message.
 208                          * Additionally, this overwriting behavior is consistent
 209                          * with what the sequential checkout does, so it doesn't
 210                          * add any extra overhead.
 211                          */
 212                         ret |= checkout_entry_ca(pc_item->ce, &pc_item->ca,
 213                                                  state, NULL, NULL);
 214                         advance_progress_meter();
 215                         break;
 216                 case PC_ITEM_PENDING:
 217                         have_pending = 1;
 218                         /* fall through */
 219                 case PC_ITEM_FAILED:
 220                         ret = -1;
 221                         break;
 222                 default:
 223                         BUG("unknown checkout item status in parallel checkout");
 224                 }
 225         }
 226
 227         if (have_pending)
 228                 error("parallel checkout finished with pending entries");
 229
 230         return ret;
 231 }
 232
 233 static int reset_fd(int fd, const char *path)
 234 {
 235         if (lseek(fd, 0, SEEK_SET) != 0)
 236                 return error_errno("failed to rewind descriptor of '%s'", path);
 237         if (ftruncate(fd, 0))
 238                 return error_errno("failed to truncate file '%s'", path);
 239         return 0;
 240 }
 241
 242 static int write_pc_item_to_fd(struct parallel_checkout_item *pc_item, int fd,
 243                                const char *path)
 244 {
 245         int ret;
 246         struct stream_filter *filter;
 247         struct strbuf buf = STRBUF_INIT;
 248         char *blob;
 249         unsigned long size;
 250         ssize_t wrote;
 251
 252         /* Sanity check */
 253         assert(is_eligible_for_parallel_checkout(pc_item->ce, &pc_item->ca));
 254
 255         filter = get_stream_filter_ca(&pc_item->ca, &pc_item->ce->oid);
 256         if (filter) {
 257                 if (stream_blob_to_fd(fd, &pc_item->ce->oid, filter, 1)) {
 258                         /* On error, reset fd to try writing without streaming */
 259                         if (reset_fd(fd, path))
 260                                 return -1;
 261                 } else {
 262                         return 0;
 263                 }
 264         }
 265
 266         blob = read_blob_entry(pc_item->ce, &size);
 267         if (!blob)
 268                 return error("cannot read object %s '%s'",
 269                              oid_to_hex(&pc_item->ce->oid), pc_item->ce->name);
 270
 271         /*
 272          * checkout metadata is used to give context for external process
 273          * filters. Files requiring such filters are not eligible for parallel
 274          * checkout, so pass NULL. Note: if that changes, the metadata must also
 275          * be passed from the main process to the workers.
 276          */
 277         ret = convert_to_working_tree_ca(&pc_item->ca, pc_item->ce->name,
 278                                          blob, size, &buf, NULL);
 279
 280         if (ret) {
 281                 size_t newsize;
 282                 free(blob);
 283                 blob = strbuf_detach(&buf, &newsize);
 284                 size = newsize;
 285         }
 286
 287         wrote = write_in_full(fd, blob, size);
 288         free(blob);
 289         if (wrote < 0)
 290                 return error("unable to write file '%s'", path);
 291
 292         return 0;
 293 }
 294
 295 static int close_and_clear(int *fd)
 296 {
 297         int ret = 0;
 298
 299         if (*fd >= 0) {
 300                 ret = close(*fd);
 301                 *fd = -1;
 302         }
 303
 304         return ret;
 305 }
 306
 307 void write_pc_item(struct parallel_checkout_item *pc_item,
 308                    struct checkout *state)
 309 {
 310         unsigned int mode = (pc_item->ce->ce_mode & 0100) ? 0777 : 0666;
 311         int fd = -1, fstat_done = 0;
 312         struct strbuf path = STRBUF_INIT;
 313         const char *dir_sep;
 314
 315         strbuf_add(&path, state->base_dir, state->base_dir_len);
 316         strbuf_add(&path, pc_item->ce->name, pc_item->ce->ce_namelen);
 317
 318         dir_sep = find_last_dir_sep(path.buf);
 319
 320         /*
 321          * The leading dirs should have been already created by now. But, in
 322          * case of path collisions, one of the dirs could have been replaced by
 323          * a symlink (checked out after we enqueued this entry for parallel
 324          * checkout). Thus, we must check the leading dirs again.
 325          */
 326         if (dir_sep && !has_dirs_only_path(path.buf, dir_sep - path.buf,
 327                                            state->base_dir_len)) {
 328                 pc_item->status = PC_ITEM_COLLIDED;
 329                 goto out;
 330         }
 331
 332         fd = open(path.buf, O_WRONLY | O_CREAT | O_EXCL, mode);
 333
 334         if (fd < 0) {
 335                 if (errno == EEXIST || errno == EISDIR) {
 336                         /*
 337                          * Errors which probably represent a path collision.
 338                          * Suppress the error message and mark the item to be
 339                          * retried later, sequentially. ENOTDIR and ENOENT are
 340                          * also interesting, but the above has_dirs_only_path()
 341                          * call should have already caught these cases.
 342                          */
 343                         pc_item->status = PC_ITEM_COLLIDED;
 344                 } else {
 345                         error_errno("failed to open file '%s'", path.buf);
 346                         pc_item->status = PC_ITEM_FAILED;
 347                 }
 348                 goto out;
 349         }
 350
 351         if (write_pc_item_to_fd(pc_item, fd, path.buf)) {
 352                 /* Error was already reported. */
 353                 pc_item->status = PC_ITEM_FAILED;
 354                 close_and_clear(&fd);
 355                 unlink(path.buf);
 356                 goto out;
 357         }
 358
 359         fstat_done = fstat_checkout_output(fd, state, &pc_item->st);
 360
 361         if (close_and_clear(&fd)) {
 362                 error_errno("unable to close file '%s'", path.buf);
 363                 pc_item->status = PC_ITEM_FAILED;
 364                 goto out;
 365         }
 366
 367         if (state->refresh_cache && !fstat_done && lstat(path.buf, &pc_item->st) < 0) {
 368                 error_errno("unable to stat just-written file '%s'",  path.buf);
 369                 pc_item->status = PC_ITEM_FAILED;
 370                 goto out;
 371         }
 372
 373         pc_item->status = PC_ITEM_WRITTEN;
 374
 375 out:
 376         strbuf_release(&path);
 377 }
 378
 379 static void send_one_item(int fd, struct parallel_checkout_item *pc_item)
 380 {
 381         size_t len_data;
 382         char *data, *variant;
 383         struct pc_item_fixed_portion *fixed_portion;
 384         const char *working_tree_encoding = pc_item->ca.working_tree_encoding;
 385         size_t name_len = pc_item->ce->ce_namelen;
 386         size_t working_tree_encoding_len = working_tree_encoding ?
 387                                            strlen(working_tree_encoding) : 0;
 388
 389         /*
 390          * Any changes in the calculation of the message size must also be made
 391          * in is_eligible_for_parallel_checkout().
 392          */
 393         len_data = sizeof(struct pc_item_fixed_portion) + name_len +
 394                    working_tree_encoding_len;
 395
 396         data = xcalloc(1, len_data);
 397
 398         fixed_portion = (struct pc_item_fixed_portion *)data;
 399         fixed_portion->id = pc_item->id;
 400         fixed_portion->ce_mode = pc_item->ce->ce_mode;
 401         fixed_portion->crlf_action = pc_item->ca.crlf_action;
 402         fixed_portion->ident = pc_item->ca.ident;
 403         fixed_portion->name_len = name_len;
 404         fixed_portion->working_tree_encoding_len = working_tree_encoding_len;
 405         /*
 406          * We use hashcpy() instead of oidcpy() because the hash[] positions
 407          * after `the_hash_algo->rawsz` might not be initialized. And Valgrind
 408          * would complain about passing uninitialized bytes to a syscall
 409          * (write(2)). There is no real harm in this case, but the warning could
 410          * hinder the detection of actual errors.
 411          */
 412         hashcpy(fixed_portion->oid.hash, pc_item->ce->oid.hash);
 413
 414         variant = data + sizeof(*fixed_portion);
 415         if (working_tree_encoding_len) {
 416                 memcpy(variant, working_tree_encoding, working_tree_encoding_len);
 417                 variant += working_tree_encoding_len;
 418         }
 419         memcpy(variant, pc_item->ce->name, name_len);
 420
 421         packet_write(fd, data, len_data);
 422
 423         free(data);
 424 }
 425
 426 static void send_batch(int fd, size_t start, size_t nr)
 427 {
 428         size_t i;
 429         sigchain_push(SIGPIPE, SIG_IGN);
 430         for (i = 0; i < nr; i++)
 431                 send_one_item(fd, &parallel_checkout.items[start + i]);
 432         packet_flush(fd);
 433         sigchain_pop(SIGPIPE);
 434 }
 435
 436 static struct pc_worker *setup_workers(struct checkout *state, int num_workers)
 437 {
 438         struct pc_worker *workers;
 439         int i, workers_with_one_extra_item;
 440         size_t base_batch_size, batch_beginning = 0;
 441
 442         ALLOC_ARRAY(workers, num_workers);
 443
 444         for (i = 0; i < num_workers; i++) {
 445                 struct child_process *cp = &workers[i].cp;
 446
 447                 child_process_init(cp);
 448                 cp->git_cmd = 1;
 449                 cp->in = -1;
 450                 cp->out = -1;
 451                 cp->clean_on_exit = 1;
 452                 strvec_push(&cp->args, "checkout--worker");
 453                 if (state->base_dir_len)
 454                         strvec_pushf(&cp->args, "--prefix=%s", state->base_dir);
 455                 if (start_command(cp))
 456                         die("failed to spawn checkout worker");
 457         }
 458
 459         base_batch_size = parallel_checkout.nr / num_workers;
 460         workers_with_one_extra_item = parallel_checkout.nr % num_workers;
 461
 462         for (i = 0; i < num_workers; i++) {
 463                 struct pc_worker *worker = &workers[i];
 464                 size_t batch_size = base_batch_size;
 465
 466                 /* distribute the extra work evenly */
 467                 if (i < workers_with_one_extra_item)
 468                         batch_size++;
 469
 470                 send_batch(worker->cp.in, batch_beginning, batch_size);
 471                 worker->next_item_to_complete = batch_beginning;
 472                 worker->nr_items_to_complete = batch_size;
 473
 474                 batch_beginning += batch_size;
 475         }
 476
 477         return workers;
 478 }
 479
 480 static void finish_workers(struct pc_worker *workers, int num_workers)
 481 {
 482         int i;
 483
 484         /*
 485          * Close pipes before calling finish_command() to let the workers
 486          * exit asynchronously and avoid spending extra time on wait().
 487          */
 488         for (i = 0; i < num_workers; i++) {
 489                 struct child_process *cp = &workers[i].cp;
 490                 if (cp->in >= 0)
 491                         close(cp->in);
 492                 if (cp->out >= 0)
 493                         close(cp->out);
 494         }
 495
 496         for (i = 0; i < num_workers; i++) {
 497                 int rc = finish_command(&workers[i].cp);
 498                 if (rc > 128) {
 499                         /*
 500                          * For a normal non-zero exit, the worker should have
 501                          * already printed something useful to stderr. But a
 502                          * death by signal should be mentioned to the user.
 503                          */
 504                         error("checkout worker %d died of signal %d", i, rc - 128);
 505                 }
 506         }
 507
 508         free(workers);
 509 }
 510
 511 static inline void assert_pc_item_result_size(int got, int exp)
 512 {
 513         if (got != exp)
 514                 BUG("wrong result size from checkout worker (got %dB, exp %dB)",
 515                     got, exp);
 516 }
 517
 518 static void parse_and_save_result(const char *buffer, int len,
 519                                   struct pc_worker *worker)
 520 {
 521         struct pc_item_result *res;
 522         struct parallel_checkout_item *pc_item;
 523         struct stat *st = NULL;
 524
 525         if (len < PC_ITEM_RESULT_BASE_SIZE)
 526                 BUG("too short result from checkout worker (got %dB, exp >=%dB)",
 527                     len, (int)PC_ITEM_RESULT_BASE_SIZE);
 528
 529         res = (struct pc_item_result *)buffer;
 530
 531         /*
 532          * Worker should send either the full result struct on success, or
 533          * just the base (i.e. no stat data), otherwise.
 534          */
 535         if (res->status == PC_ITEM_WRITTEN) {
 536                 assert_pc_item_result_size(len, (int)sizeof(struct pc_item_result));
 537                 st = &res->st;
 538         } else {
 539                 assert_pc_item_result_size(len, (int)PC_ITEM_RESULT_BASE_SIZE);
 540         }
 541
 542         if (!worker->nr_items_to_complete)
 543                 BUG("received result from supposedly finished checkout worker");
 544         if (res->id != worker->next_item_to_complete)
 545                 BUG("unexpected item id from checkout worker (got %"PRIuMAX", exp %"PRIuMAX")",
 546                     (uintmax_t)res->id, (uintmax_t)worker->next_item_to_complete);
 547
 548         worker->next_item_to_complete++;
 549         worker->nr_items_to_complete--;
 550
 551         pc_item = &parallel_checkout.items[res->id];
 552         pc_item->status = res->status;
 553         if (st)
 554                 pc_item->st = *st;
 555
 556         if (res->status != PC_ITEM_COLLIDED)
 557                 advance_progress_meter();
 558 }
 559
 560 static void gather_results_from_workers(struct pc_worker *workers,
 561                                         int num_workers)
 562 {
 563         int i, active_workers = num_workers;
 564         struct pollfd *pfds;
 565
 566         CALLOC_ARRAY(pfds, num_workers);
 567         for (i = 0; i < num_workers; i++) {
 568                 pfds[i].fd = workers[i].cp.out;
 569                 pfds[i].events = POLLIN;
 570         }
 571
 572         while (active_workers) {
 573                 int nr = poll(pfds, num_workers, -1);
 574
 575                 if (nr < 0) {
 576                         if (errno == EINTR)
 577                                 continue;
 578                         die_errno("failed to poll checkout workers");
 579                 }
 580
 581                 for (i = 0; i < num_workers && nr > 0; i++) {
 582                         struct pc_worker *worker = &workers[i];
 583                         struct pollfd *pfd = &pfds[i];
 584
 585                         if (!pfd->revents)
 586                                 continue;
 587
 588                         if (pfd->revents & POLLIN) {
 589                                 int len = packet_read(pfd->fd, NULL, NULL,
 590                                                       packet_buffer,
 591                                                       sizeof(packet_buffer), 0);
 592
 593                                 if (len < 0) {
 594                                         BUG("packet_read() returned negative value");
 595                                 } else if (!len) {
 596                                         pfd->fd = -1;
 597                                         active_workers--;
 598                                 } else {
 599                                         parse_and_save_result(packet_buffer,
 600                                                               len, worker);
 601                                 }
 602                         } else if (pfd->revents & POLLHUP) {
 603                                 pfd->fd = -1;
 604                                 active_workers--;
 605                         } else if (pfd->revents & (POLLNVAL | POLLERR)) {
 606                                 die("error polling from checkout worker");
 607                         }
 608
 609                         nr--;
 610                 }
 611         }
 612
 613         free(pfds);
 614 }
 615
 616 static void write_items_sequentially(struct checkout *state)
 617 {
 618         size_t i;
 619
 620         for (i = 0; i < parallel_checkout.nr; i++) {
 621                 struct parallel_checkout_item *pc_item = &parallel_checkout.items[i];
 622                 write_pc_item(pc_item, state);
 623                 if (pc_item->status != PC_ITEM_COLLIDED)
 624                         advance_progress_meter();
 625         }
 626 }
 627
 628 int run_parallel_checkout(struct checkout *state, int num_workers, int threshold,
 629                           struct progress *progress, unsigned int *progress_cnt)
 630 {
 631         int ret;
 632
 633         if (parallel_checkout.status != PC_ACCEPTING_ENTRIES)
 634                 BUG("cannot run parallel checkout: uninitialized or already running");
 635
 636         parallel_checkout.status = PC_RUNNING;
 637         parallel_checkout.progress = progress;
 638         parallel_checkout.progress_cnt = progress_cnt;
 639
 640         if (parallel_checkout.nr < num_workers)
 641                 num_workers = parallel_checkout.nr;
 642
 643         if (num_workers <= 1 || parallel_checkout.nr < threshold) {
 644                 write_items_sequentially(state);
 645         } else {
 646                 struct pc_worker *workers = setup_workers(state, num_workers);
 647                 gather_results_from_workers(workers, num_workers);
 648                 finish_workers(workers, num_workers);
 649         }
 650
 651         ret = handle_results(state);
 652
 653         finish_parallel_checkout();
 654         return ret;
 655 }