From 7fa706a316f043068e4d37a7d512bd44295e14b0 Mon Sep 17 00:00:00 2001 From: Johan Lorensson Date: Mon, 16 Sep 2019 02:51:07 +0200 Subject: [PATCH] Fix infrequent hangs in test-runner. (#16793) * Fix infrequent hangs in test-runner. test-runner has been plagued with very infrequent hangs causing long timeouts on CI. Turns out that there is a race in AsyncStreamReader used by Process class for handling stdout/stderr when closing down process. Since AsyncStreamReader::Dispose didn't make sure no pending async read requests were still in flight before closing the stream and underlying handle, it led to a race condition deadlocking the complete process, hanging test-runner. A similar race has also been observed but instead of causing a deadlock, it could also manifest as an System.ObjectDisposedException when doing EndRead while another thread is disposing the underlying stream. Hitting that race didn't deadlock the process but failed to complete invoke of test-runner. Fix is to better protect the async callback and the thread doing the dispose call. In order to make sure we don't close underlying handle while still having async reads in flight, fix also checks async result for completion and if not completed when trying to dispose AsyncStreamReader it will try to cancel outstanding pending IO and wait for completion before closing the stream. Since the issue has only been seen on Windows, the fix will try to cancel pending IO and wait for completion before closing stream on Windows. On other platforms, old behavior will be preserved, meaning that the implementation will still be vunarable to race between pending reads and close of stream, but since problem has not been observed on other platforms and since there is no good way of cancel outstanding blocking reads, old behavior is kept. At some point in future we should look into replacing the implementation of AsyncStreamReader and use cancelation tokens, but since we don't have all corefx support around cancelation in Mono BCL, that currently won't solve the issue, meaning that we need to bring in more classes. Since the problem also occur in test-runner using the build BCL profile, even more needs to be added to that profile in order to get similar support. But due to the infrequency of the problem and that it has only been observed on Windows, the isolated fix seems more reasonable. * Build error fix. * Review feedback. * Fix type cast not working on linux builds. * Adjust icall naming in icall-def.h. --- mcs/class/corlib/System.IO/MonoIO.cs | 15 ++++++ mcs/class/corlib/System.IO/MonoIOError.cs | 4 +- .../system/diagnosticts/AsyncStreamReader.cs | 61 +++++++++++++++++----- mono/metadata/icall-def.h | 3 +- mono/metadata/w32file-unix.c | 7 +++ mono/metadata/w32file-win32.c | 6 +++ mono/metadata/w32file.c | 13 +++++ mono/metadata/w32file.h | 8 +++ 8 files changed, 100 insertions(+), 17 deletions(-) diff --git a/mcs/class/corlib/System.IO/MonoIO.cs b/mcs/class/corlib/System.IO/MonoIO.cs index 3567809b9da..46774397bcd 100644 --- a/mcs/class/corlib/System.IO/MonoIO.cs +++ b/mcs/class/corlib/System.IO/MonoIO.cs @@ -434,6 +434,21 @@ namespace System.IO } [MethodImplAttribute (MethodImplOptions.InternalCall)] + private extern static bool Cancel_internal (IntPtr handle, out MonoIOError error); + + internal static bool Cancel (SafeHandle safeHandle, out MonoIOError error) + { + bool release = false; + try { + safeHandle.DangerousAddRef (ref release); + return Cancel_internal (safeHandle.DangerousGetHandle (), out error); + } finally { + if (release) + safeHandle.DangerousRelease (); + } + } + + [MethodImplAttribute (MethodImplOptions.InternalCall)] public extern static bool Close (IntPtr handle, out MonoIOError error); diff --git a/mcs/class/corlib/System.IO/MonoIOError.cs b/mcs/class/corlib/System.IO/MonoIOError.cs index 2a78687ef36..897ab869da8 100644 --- a/mcs/class/corlib/System.IO/MonoIOError.cs +++ b/mcs/class/corlib/System.IO/MonoIOError.cs @@ -75,8 +75,8 @@ namespace System.IO ERROR_SHARING_BUFFER_EXCEEDED = 36, ERROR_HANDLE_EOF = 38, */ ERROR_HANDLE_DISK_FULL = 39, - /* ERROR_NOT_SUPPORTED = 50, - ERROR_REM_NOT_LIST = 51, + ERROR_NOT_SUPPORTED = 50, + /* ERROR_REM_NOT_LIST = 51, ERROR_DUP_NAME = 52, ERROR_BAD_NETPATH = 53, ERROR_NETWORK_BUSY = 54, diff --git a/mcs/class/referencesource/System/services/monitoring/system/diagnosticts/AsyncStreamReader.cs b/mcs/class/referencesource/System/services/monitoring/system/diagnosticts/AsyncStreamReader.cs index 7be029f7e81..8260061e8d8 100644 --- a/mcs/class/referencesource/System/services/monitoring/system/diagnosticts/AsyncStreamReader.cs +++ b/mcs/class/referencesource/System/services/monitoring/system/diagnosticts/AsyncStreamReader.cs @@ -19,8 +19,9 @@ namespace System.Diagnostics { using System.IO; using System.Text; using System.Runtime.InteropServices; - using System.Threading; - using System.Collections; + using System.Threading; + using System.Collections; + using Microsoft.Win32.SafeHandles; internal delegate void UserCallBack(String data); @@ -58,10 +59,10 @@ namespace System.Diagnostics { // Cache the last position scanned in sb when searching for lines. private int currentLinePos; - #if MONO - //users to coordinate between Dispose and BeginReadLine - private object syncObject = new Object (); + //users to coordinate between Dispose and ReadBuffer + private object syncObject = new Object (); + private IAsyncResult asyncReadResult = null; #endif internal AsyncStreamReader(Process process, Stream stream, UserCallBack callback, Encoding encoding) @@ -115,8 +116,31 @@ namespace System.Diagnostics { lock (syncObject) { #endif if (disposing) { - if (stream != null) + if (stream != null) { +#if MONO + if (asyncReadResult != null && !asyncReadResult.IsCompleted) { + // Closing underlying stream when having pending async read request in progress is + // not portable and racy by design. Try to cancel pending async read before closing stream. + // We are still holding lock that will prevent new async read requests to queue up + // before we have closed and invalidated the stream. + if (stream is FileStream) { + SafeHandle tmpStreamHandle = ((FileStream)stream).SafeFileHandle; + while (!asyncReadResult.IsCompleted) { + MonoIOError error; + if (!MonoIO.Cancel (tmpStreamHandle, out error) && error == MonoIOError.ERROR_NOT_SUPPORTED) { + // Platform don't support canceling pending IO requests on stream. If an async pending read + // is still in flight when closing stream, it could trigger a race condition. + break; + } + + // Wait for a short time for pending async read to cancel/complete/fail. + asyncReadResult.AsyncWaitHandle.WaitOne (200); + } + } + } +#endif stream.Close(); + } } if (stream != null) { stream = null; @@ -151,7 +175,11 @@ namespace System.Diagnostics { if( sb == null ) { sb = new StringBuilder(DefaultBufferSize); +#if MONO + asyncReadResult = stream.BeginRead (byteBuffer, 0 , byteBuffer.Length, new AsyncCallback (ReadBuffer), null); +#else stream.BeginRead(byteBuffer, 0 , byteBuffer.Length, new AsyncCallback(ReadBuffer), null); +#endif } else { FlushMessageQueue(); @@ -169,12 +197,17 @@ namespace System.Diagnostics { try { #if MONO - var stream = this.stream; - if (stream == null) - byteLen = 0; - else -#endif + lock (syncObject) { + Debug.Assert (ar.IsCompleted); + asyncReadResult = null; + if (this.stream == null) + byteLen = 0; + else + byteLen = stream.EndRead (ar); + } +#else byteLen = stream.EndRead(ar); +#endif } catch (IOException ) { // We should ideally consume errors from operations getting cancelled @@ -242,10 +275,10 @@ retry_dispose: byteLen = 0; goto retry_dispose; } -#endif - stream.BeginRead(byteBuffer, 0 , byteBuffer.Length, new AsyncCallback(ReadBuffer), null); -#if MONO + asyncReadResult = stream.BeginRead (byteBuffer, 0 , byteBuffer.Length, new AsyncCallback(ReadBuffer), null); } +#else + stream.BeginRead(byteBuffer, 0 , byteBuffer.Length, new AsyncCallback(ReadBuffer), null); #endif } } diff --git a/mono/metadata/icall-def.h b/mono/metadata/icall-def.h index 873a7a8839a..4433731ffa5 100644 --- a/mono/metadata/icall-def.h +++ b/mono/metadata/icall-def.h @@ -407,7 +407,8 @@ HANDLES(MMAPIMPL_5, "OpenFileInternal", mono_mmap_open_file, gpointer, 9, (const HANDLES(MMAPIMPL_6, "OpenHandleInternal", mono_mmap_open_handle, gpointer, 7, (gpointer, const_gunichar2_ptr, int, gint64_ref, int, int, int_ref)) HANDLES(MMAPIMPL_7, "Unmap", mono_mmap_unmap, MonoBoolean, 1, (gpointer)) -ICALL_TYPE(MONOIO, "System.IO.MonoIO", MONOIO_1) +ICALL_TYPE(MONOIO, "System.IO.MonoIO", MONOIO_39) +NOHANDLES(ICALL(MONOIO_39, "Cancel_internal", ves_icall_System_IO_MonoIO_Cancel)) NOHANDLES(ICALL(MONOIO_1, "Close(intptr,System.IO.MonoIOError&)", ves_icall_System_IO_MonoIO_Close)) #ifndef PLATFORM_RO_FS NOHANDLES(ICALL(MONOIO_2, "CopyFile(char*,char*,bool,System.IO.MonoIOError&)", ves_icall_System_IO_MonoIO_CopyFile)) diff --git a/mono/metadata/w32file-unix.c b/mono/metadata/w32file-unix.c index 63dc306d75c..33f66a5c99a 100644 --- a/mono/metadata/w32file-unix.c +++ b/mono/metadata/w32file-unix.c @@ -2073,6 +2073,13 @@ mono_w32file_create(const gunichar2 *name, guint32 fileaccess, guint32 sharemode } gboolean +mono_w32file_cancel (gpointer handle) +{ + mono_w32error_set_last (ERROR_NOT_SUPPORTED); + return FALSE; +} + +gboolean mono_w32file_close (gpointer handle) { if (!mono_fdhandle_close (GPOINTER_TO_INT (handle))) { diff --git a/mono/metadata/w32file-win32.c b/mono/metadata/w32file-win32.c index e952de61b7d..112f5467066 100644 --- a/mono/metadata/w32file-win32.c +++ b/mono/metadata/w32file-win32.c @@ -64,6 +64,12 @@ mono_w32file_create(const gunichar2 *name, guint32 fileaccess, guint32 sharemode } gboolean +mono_w32file_cancel (gpointer handle) +{ + return CancelIoEx (handle, NULL); +} + +gboolean mono_w32file_close (gpointer handle) { gboolean res; diff --git a/mono/metadata/w32file.c b/mono/metadata/w32file.c index 4086e3fa0b3..ea45fe74cc5 100644 --- a/mono/metadata/w32file.c +++ b/mono/metadata/w32file.c @@ -506,6 +506,19 @@ ves_icall_System_IO_MonoIO_Close (HANDLE handle, gint32 *error) return(ret); } +MonoBoolean +ves_icall_System_IO_MonoIO_Cancel (HANDLE handle, gint32 *error) +{ + gboolean ret; + *error=ERROR_SUCCESS; + + ret = mono_w32file_cancel (handle); + if (ret == FALSE) + *error = mono_w32error_get_last (); + + return ret; +} + gint32 ves_icall_System_IO_MonoIO_Read (HANDLE handle, MonoArrayHandle dest, gint32 dest_offset, gint32 count, diff --git a/mono/metadata/w32file.h b/mono/metadata/w32file.h index e85a6a9cd15..772992593e9 100644 --- a/mono/metadata/w32file.h +++ b/mono/metadata/w32file.h @@ -164,6 +164,11 @@ gpointer ves_icall_System_IO_MonoIO_Open (const gunichar2 *filename, gint32 mode, gint32 access_mode, gint32 share, gint32 options, gint32 *error); + +ICALL_EXPORT +MonoBoolean +ves_icall_System_IO_MonoIO_Cancel (gpointer handle, gint32 *error); + ICALL_EXPORT MonoBoolean ves_icall_System_IO_MonoIO_Close (gpointer handle, gint32 *error); @@ -370,6 +375,9 @@ gpointer mono_w32file_create(const gunichar2 *name, guint32 fileaccess, guint32 sharemode, guint32 createmode, guint32 attrs); gboolean +mono_w32file_cancel (gpointer handle); + +gboolean mono_w32file_close (gpointer handle); gboolean -- 2.11.4.GIT