From 58f03414215dbb84bfd8875abe73b6521b83438b Mon Sep 17 00:00:00 2001 From: Christian Lohmaier Date: Fri, 3 Nov 2023 15:41:39 +0100 Subject: [PATCH] limit parallelism during msi packaging stage to avoid cscript failures with high parallelism there's a high risk of running into random failures when calling WiLangId.vbs via cscript. The limiter doesn't use make's jobserver since it is too easy to deadlock the build since all jobs are started at once, consuming all slots, but in addition all wait for an additional slot that never is made available because all jobs are blocked waiting.... All jobs being started at once and all jobs getting started from that point on getting put under the limiter's control makes this simple approach with separate grab/release calls possible. If they were spread out the semaphore wouldn't be available (gets closed/removed as soon as nothing waits for it anymore) Change-Id: I345f2904a1d7e8989720722415fb51282ab3b05b Reviewed-on: https://gerrit.libreoffice.org/c/core/+/158886 Tested-by: Jenkins Reviewed-by: Christian Lohmaier (cherry picked from commit edf6c155b0b22cbff2d255796c0541b644a5f245) Fix typo Change-Id: I84186bee245a95a74e92c974ca94bb81c31ee1ac Reviewed-on: https://gerrit.libreoffice.org/c/core/+/158950 Tested-by: Jenkins Reviewed-by: Julien Nabet (cherry picked from commit 0053dfbabd8b6711ea7e9354e5792048931daf7c) Fix typo Change-Id: Ie41ca6c56bf44b04bd2d65b6cb64594d66295f24 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/158951 Tested-by: Jenkins Reviewed-by: Julien Nabet (cherry picked from commit 24e1d6dbd27fa6c37723b55bbab5f5b5cce279f2) msi packaging job-limiter: use CXX_FOR_BUILD since it is a build-tool it needs to match the arch of the platform the build is performed, already had used ILIB_FOR_BUILD, but CXX was missed and broke the build for the aarch64 daily tinderbox Change-Id: Ie41ca6c56bf44b04bd2d65b6cb64594d66295f24 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/158993 Tested-by: Jenkins Reviewed-by: Thorsten Behrens --- instsetoo_native/CustomTarget_install.mk | 16 ++++- solenv/bin/job-limiter.cpp | 116 +++++++++++++++++++++++++++++++ 2 files changed, 131 insertions(+), 1 deletion(-) create mode 100644 solenv/bin/job-limiter.cpp diff --git a/instsetoo_native/CustomTarget_install.mk b/instsetoo_native/CustomTarget_install.mk index f05b771fa107..aa18e827fda9 100644 --- a/instsetoo_native/CustomTarget_install.mk +++ b/instsetoo_native/CustomTarget_install.mk @@ -95,8 +95,19 @@ $(call gb_CustomTarget_get_workdir,instsetoo_native/install)/msi_templates/%/Bin $(call gb_Output_announce,setting up msi templates for type $* - copying binary assets,$(true),CPY,4) rm -rf $@ && mkdir -p $@ && cd $@ && cp $(SRCDIR)/instsetoo_native/inc_common/windows/msi_templates/Binary/*.* ./ +gb_Make_JobLimiter := $(WORKDIR)/job-limiter.exe + +$(gb_Make_JobLimiter): $(SRCDIR)/solenv/bin/job-limiter.cpp + cd $(WORKDIR) && \ + $(CXX_FOR_BUILD) $(SOLARINC) -EHsc $^ -link -LIBPATH:$(subst ;, -LIBPATH:,$(ILIB_FOR_BUILD)) || rm -f $@ + # with all languages the logfile name would be too long when building the windows installation set, # that's the reason for the substitution to multilang below in case more than just en-US is packaged +# also for windows msi packaging parallel execution is reduced by the job-limiter. This only has any +# effect when building with help and multiple languages, and it also won't affect the real time for +# packaging (since packaging the main installer takes longer than packaging sdk and all helppacks +# even with the reduced parallelism (the higher the parallelism, the higher the chance for random +# failures during the cscript call to WiLangId.vbs) $(instsetoo_installer_targets): $(SRCDIR)/solenv/bin/make_installer.pl \ $(foreach ulf,$(instsetoo_ULFLIST),$(call gb_CustomTarget_get_workdir,instsetoo_native/install)/win_ulffiles/$(ulf).ulf) \ $(if $(filter-out WNT,$(OS)),\ @@ -104,12 +115,15 @@ $(instsetoo_installer_targets): $(SRCDIR)/solenv/bin/make_installer.pl \ bin/find-requires-gnome.sh \ bin/find-requires-x11.sh) \ ,instsetoo_msi_templates) \ - $(call gb_Postprocess_get_target,AllModulesButInstsetNative) | instsetoo_wipe + $(call gb_Postprocess_get_target,AllModulesButInstsetNative) \ + | instsetoo_wipe $(if $(filter msi,$(PKGFORMAT)),$(gb_Make_JobLimiter)) $(call gb_Output_announce,$(if $(filter en-US$(COMMA)%,$(instsetoo_installer_langs)),$(subst $(instsetoo_installer_langs),multilang,$@),$@),$(true),INS,1) + $(if $(filter %msi‧nostrip,$@),$(gb_Make_JobLimiter) grab) $(call gb_Trace_StartRange,$@,INSTALLER) $(call gb_Helper_print_on_error, \ $(SRCDIR)/solenv/bin/call_installer.sh $(if $(verbose),-verbose,-quiet) $(subst ‧,:,$@),\ $(call gb_CustomTarget_get_workdir,instsetoo_native/install)/$(if $(filter en-US$(COMMA)%,$(instsetoo_installer_langs)),$(subst $(instsetoo_installer_langs),multilang,$@),$@).log) + $(if $(filter %msi‧nostrip,$@),$(gb_Make_JobLimiter) release) $(call gb_Trace_EndRange,$@,INSTALLER) $(call gb_CustomTarget_get_workdir,instsetoo_native/install)/install.phony: $(instsetoo_installer_targets) diff --git a/solenv/bin/job-limiter.cpp b/solenv/bin/job-limiter.cpp new file mode 100644 index 000000000000..9111ead35127 --- /dev/null +++ b/solenv/bin/job-limiter.cpp @@ -0,0 +1,116 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +// Helper to reduce parallelism, specifically for msi installset packaging +// +// Ideally it would hook into make's jobserver, but it is too easy to deadlock the build at the +// packaging stage (all jobs are started at once, consuming all available slots as part of general +// parallelism, and then each job waiting for an additional job token but since all jobs are in +// waiting stage: you have a deadlock). +// That in turn is an advantage for a simple approach with separate lock and release commands, since +// everything is started at once, and all jobs that will be queued are all jobs that use the helper, +// there will be jobs in waiting state and keeping the semaphore active as long as it matters. + +#include +#include + +using namespace std; + +int grab(HANDLE& semaphore_handle) +{ + DWORD waitresult = WaitForSingleObject(semaphore_handle, INFINITE); + + switch (waitresult) + { + case WAIT_OBJECT_0: + return 0; + case WAIT_TIMEOUT: + fprintf(stderr, "grabbing a slot failed with timeout despite waiting for INFINITE?\n"); + break; + case WAIT_FAILED: + fprintf(stderr, "grabbing a slot failed with error: %d\n", GetLastError()); + break; + default: + fprintf(stderr, "grabbing a slot failed with status %d - error %d", waitresult, + GetLastError()); + } + return 1; +} + +int release(HANDLE& semaphore_handle) +{ + if (ReleaseSemaphore(semaphore_handle, 1, NULL)) + { + return 0; + } + else + { + fprintf(stderr, "something went wrong trying to release a slot: %d\n", GetLastError()); + return 1; + } +} + +int wmain(int argc, wchar_t* argv[]) +{ + if (argc != 2) + { + fprintf(stderr, "Invalid number of arguments!\n"); + fprintf(stderr, "call with 'grab' or 'release'\n"); + exit(1); + } + // since the reason for limiting the parallelism is the instability with cscript/WiLangId.vbs + // calls and high parallelism, not having that unique/potentially sharing the limit semaphore + // with another build is not a problem but considered a feature... + LPCWSTR semaphorename = L"lo_installset_limiter"; + HANDLE semaphore_handle + = OpenSemaphoreW(SYNCHRONIZE | SEMAPHORE_MODIFY_STATE, false, semaphorename); + if (semaphore_handle == NULL) + { + fprintf(stdout, "no existing semaphore, creating new one\n"); + // Need to have a buffer slot, since the helper isn't persistent. + // When it is called to release a slot it might be that all holders of the semaphore + // are gone already and thus the semaphore also gets closed and is no longer present on the + // system. So when it creates a new one to use, and then releases one it would hit the max + // limit otherwise. This only happens when nothing else is waiting for a slot anymore, + // i.e. when there are already fewer jobs than imposed by the limiter. + // A limit of four (main installer and 3 controlled by this limiter) was chosen because that + // won't affect overall build time (creating the main installer with multiple languages + // takes much longer than all the helppackages and the single sdk package combined, even + // when those are limited to three jobs), and seems to be low enough to avoid the random + // cscript/WiLangId.vbs failures. + semaphore_handle = CreateSemaphoreW(NULL, 3, 4, semaphorename); + // keep this process alive for other jobs to grab the semaphore, otherwise it is gone too + // quickly and everything creates their own semaphore that immediately has enough slots, + // completely bypassing the point of having a limiter... + Sleep(500); + } + if (semaphore_handle == NULL) + { + fprintf(stderr, "Error creating/opening the semaphore - bailing out (%d)\n", + GetLastError()); + exit(1); + } + + if (wcscmp(argv[1], L"grab") == 0) + { + exit(grab(semaphore_handle)); + } + else if (wcscmp(argv[1], L"release") == 0) + { + exit(release(semaphore_handle)); + } + else + { + fwprintf(stderr, L"invalid action '%s'\nSupported actions are 'grab' or 'release'\n", + argv[1]); + } + exit(1); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */ -- 2.11.4.GIT