From f754ab71a3005f5d6046907c51e8aecad4c95b76 Mon Sep 17 00:00:00 2001 From: Mark Probst Date: Fri, 6 Aug 2010 15:45:34 +0200 Subject: [PATCH] [sgen] Make parallel vs non-parallel mark&sweep selectable. Make the parallel Mark&Sweep collector selectable with the "major" option of the "MONO_GC_PARAMS" variable as "marksweep-par". The serial Mark&Sweep collector remains the default as "marksweep". --- man/mono.1 | 5 +-- mono/metadata/Makefile.am | 1 + mono/metadata/sgen-gc.c | 74 ++++++++++++++++++++------------------ mono/metadata/sgen-gc.h | 17 ++------- mono/metadata/sgen-major-copying.c | 1 + mono/metadata/sgen-marksweep-par.c | 3 ++ mono/metadata/sgen-marksweep.c | 12 ++++++- mono/metadata/sgen-workers.c | 62 ++++++++++++-------------------- 8 files changed, 83 insertions(+), 92 deletions(-) create mode 100644 mono/metadata/sgen-marksweep-par.c diff --git a/man/mono.1 b/man/mono.1 index 5144d386f39..b5409d7726d 100644 --- a/man/mono.1 +++ b/man/mono.1 @@ -1361,8 +1361,9 @@ specify kilo-, mega- and gigabytes, respectively. .TP \fBmajor=\fIcollector\fR Specifies which major collector to use. Options are `marksweep' for -the Mark&Sweep collector, and `copying' for the copying collector. -The Mark&Sweep collector is the default. +the Mark&Sweep collector, `marksweep-par' for parallel Mark&Sweep and +`copying' for the copying collector. The Mark&Sweep collector is the +default. .ne .RE .TP diff --git a/mono/metadata/Makefile.am b/mono/metadata/Makefile.am index a906e5c44d3..fa74360c6a1 100644 --- a/mono/metadata/Makefile.am +++ b/mono/metadata/Makefile.am @@ -168,6 +168,7 @@ libmonoruntime_la_SOURCES = \ sgen-gc.c \ sgen-internal.c \ sgen-marksweep.c \ + sgen-marksweep-par.c \ sgen-major-copying.c \ sgen-gc.h \ sgen-archdep.h \ diff --git a/mono/metadata/sgen-gc.c b/mono/metadata/sgen-gc.c index 6242c78cd43..b10c5d7a6ad 100644 --- a/mono/metadata/sgen-gc.c +++ b/mono/metadata/sgen-gc.c @@ -507,10 +507,10 @@ static mword lowest_heap_address = ~(mword)0; static mword highest_heap_address = 0; static LOCK_DECLARE (interruption_mutex); - -#ifdef SGEN_PARALLEL_MARK static LOCK_DECLARE (global_remset_mutex); -#endif + +#define LOCK_GLOBAL_REMSET pthread_mutex_lock (&global_remset_mutex) +#define UNLOCK_GLOBAL_REMSET pthread_mutex_unlock (&global_remset_mutex) typedef struct _FinalizeEntry FinalizeEntry; struct _FinalizeEntry { @@ -1523,20 +1523,23 @@ global_remset_location_was_not_added (gpointer ptr) * * The global remset contains locations which point into newspace after * a minor collection. This can happen if the objects they point to are pinned. + * + * LOCKING: If called from a parallel collector, the global remset + * lock must be held. For serial collectors that is not necessary. */ void mono_sgen_add_to_global_remset (gpointer ptr) { RememberedSet *rs; + gboolean lock = current_collection_generation == GENERATION_OLD && major.is_parallel; g_assert (!ptr_in_nursery (ptr) && ptr_in_nursery (*(gpointer*)ptr)); - LOCK_GLOBAL_REMSET; + if (lock) + LOCK_GLOBAL_REMSET; - if (!global_remset_location_was_not_added (ptr)) { - UNLOCK_GLOBAL_REMSET; - return; - } + if (!global_remset_location_was_not_added (ptr)) + goto done; DEBUG (8, fprintf (gc_debug_file, "Adding global remset for %p\n", ptr)); binary_protocol_global_remset (ptr, *(gpointer*)ptr, (gpointer)LOAD_VTABLE (*(gpointer*)ptr)); @@ -1549,8 +1552,7 @@ mono_sgen_add_to_global_remset (gpointer ptr) */ if (global_remset->store_next + 3 < global_remset->end_set) { *(global_remset->store_next++) = (mword)ptr; - UNLOCK_GLOBAL_REMSET; - return; + goto done; } rs = alloc_remset (global_remset->end_set - global_remset->data, NULL); rs->next = global_remset; @@ -1566,7 +1568,9 @@ mono_sgen_add_to_global_remset (gpointer ptr) DEBUG (4, fprintf (gc_debug_file, "Global remset now has size %d\n", global_rs_size)); } - UNLOCK_GLOBAL_REMSET; + done: + if (lock) + UNLOCK_GLOBAL_REMSET; } /* @@ -1590,10 +1594,8 @@ drain_gray_stack (GrayQueue *queue) major.minor_scan_object (obj, queue); } } else { -#ifdef SGEN_PARALLEL_MARK - if (queue == &workers_distribute_gray_queue) + if (major.is_parallel && queue == &workers_distribute_gray_queue) return; -#endif for (;;) { GRAY_OBJECT_DEQUEUE (queue, obj); @@ -2440,9 +2442,8 @@ mono_sgen_register_moved_object (void *obj, void *destination) g_assert (mono_profiler_events & MONO_PROFILE_GC_MOVES); /* FIXME: handle this for parallel collector */ -#ifdef SGEN_PARALLEL_MARK - g_assert_not_reached (); -#endif + g_assert (!major.is_parallel); + if (moved_objects_idx == MOVED_OBJECTS_NUM) { mono_profiler_gc_moves (moved_objects, moved_objects_idx); moved_objects_idx = 0; @@ -2702,7 +2703,8 @@ major_do_collection (const char *reason) binary_protocol_collection (GENERATION_OLD); check_scan_starts (); gray_object_queue_init (&gray_queue, mono_sgen_get_unmanaged_allocator ()); - gray_object_queue_init (&workers_distribute_gray_queue, mono_sgen_get_unmanaged_allocator ()); + if (major.is_parallel) + gray_object_queue_init (&workers_distribute_gray_queue, mono_sgen_get_unmanaged_allocator ()); degraded_mode = 0; DEBUG (1, fprintf (gc_debug_file, "Start major collection %d\n", num_major_gcs)); @@ -2751,7 +2753,7 @@ major_do_collection (const char *reason) DEBUG (6, fprintf (gc_debug_file, "Pinning from sections\n")); /* first pass for the sections */ mono_sgen_find_section_pin_queue_start_end (nursery_section); - major.find_pin_queue_start_ends (&workers_distribute_gray_queue); + major.find_pin_queue_start_ends (WORKERS_DISTRIBUTE_GRAY_QUEUE); /* identify possible pointers to the insize of large objects */ DEBUG (6, fprintf (gc_debug_file, "Pinning from large objects\n")); for (bigobj = los_object_list; bigobj; bigobj = bigobj->next) { @@ -2759,15 +2761,15 @@ major_do_collection (const char *reason) if (mono_sgen_find_optimized_pin_queue_area (bigobj->data, (char*)bigobj->data + bigobj->size, &dummy)) { pin_object (bigobj->data); /* FIXME: only enqueue if object has references */ - GRAY_OBJECT_ENQUEUE (&workers_distribute_gray_queue, bigobj->data); + GRAY_OBJECT_ENQUEUE (WORKERS_DISTRIBUTE_GRAY_QUEUE, bigobj->data); if (heap_dump_file) mono_sgen_pin_stats_register_object ((char*) bigobj->data, safe_object_get_size ((MonoObject*) bigobj->data)); DEBUG (6, fprintf (gc_debug_file, "Marked large object %p (%s) size: %lu from roots\n", bigobj->data, safe_name (bigobj->data), (unsigned long)bigobj->size)); } } /* second pass for the sections */ - mono_sgen_pin_objects_in_section (nursery_section, &workers_distribute_gray_queue); - major.pin_objects (&workers_distribute_gray_queue); + mono_sgen_pin_objects_in_section (nursery_section, WORKERS_DISTRIBUTE_GRAY_QUEUE); + major.pin_objects (WORKERS_DISTRIBUTE_GRAY_QUEUE); TV_GETTIME (btv); time_major_pinning += TV_ELAPSED_MS (atv, btv); @@ -2782,8 +2784,8 @@ major_do_collection (const char *reason) time_major_scan_pinned += TV_ELAPSED_MS (btv, atv); /* registered roots, this includes static fields */ - scan_from_registered_roots (major.copy_or_mark_object, heap_start, heap_end, ROOT_TYPE_NORMAL, &workers_distribute_gray_queue); - scan_from_registered_roots (major.copy_or_mark_object, heap_start, heap_end, ROOT_TYPE_WBARRIER, &workers_distribute_gray_queue); + scan_from_registered_roots (major.copy_or_mark_object, heap_start, heap_end, ROOT_TYPE_NORMAL, WORKERS_DISTRIBUTE_GRAY_QUEUE); + scan_from_registered_roots (major.copy_or_mark_object, heap_start, heap_end, ROOT_TYPE_WBARRIER, WORKERS_DISTRIBUTE_GRAY_QUEUE); TV_GETTIME (btv); time_major_scan_registered_roots += TV_ELAPSED_MS (atv, btv); @@ -2798,8 +2800,8 @@ major_do_collection (const char *reason) time_major_scan_alloc_pinned += TV_ELAPSED_MS (atv, btv); /* scan the list of objects ready for finalization */ - scan_finalizer_entries (major.copy_or_mark_object, fin_ready_list, &workers_distribute_gray_queue); - scan_finalizer_entries (major.copy_or_mark_object, critical_fin_list, &workers_distribute_gray_queue); + scan_finalizer_entries (major.copy_or_mark_object, fin_ready_list, WORKERS_DISTRIBUTE_GRAY_QUEUE); + scan_finalizer_entries (major.copy_or_mark_object, critical_fin_list, WORKERS_DISTRIBUTE_GRAY_QUEUE); TV_GETTIME (atv); time_major_scan_finalized += TV_ELAPSED_MS (btv, atv); DEBUG (2, fprintf (gc_debug_file, "Root scan: %d usecs\n", TV_ELAPSED (btv, atv))); @@ -2807,14 +2809,16 @@ major_do_collection (const char *reason) TV_GETTIME (btv); time_major_scan_big_objects += TV_ELAPSED_MS (atv, btv); - while (!gray_object_queue_is_empty (&workers_distribute_gray_queue)) - workers_distribute_gray_queue_sections (); + if (major.is_parallel) { + /* FIXME: don't do busy waiting here! */ + while (!gray_object_queue_is_empty (WORKERS_DISTRIBUTE_GRAY_QUEUE)) + workers_distribute_gray_queue_sections (); + } workers_change_num_working (-1); workers_join (); -#ifdef SGEN_PARALLEL_MARK - g_assert (gray_object_queue_is_empty (&gray_queue)); -#endif + if (major.is_parallel) + g_assert (gray_object_queue_is_empty (&gray_queue)); /* all the objects in the heap */ finish_gray_stack (heap_start, heap_end, GENERATION_OLD, &gray_queue); @@ -6255,9 +6259,7 @@ mono_gc_base_init (void) gc_debug_file = stderr; LOCK_INIT (interruption_mutex); -#ifdef SGEN_PARALLEL_MARK LOCK_INIT (global_remset_mutex); -#endif if ((env = getenv ("MONO_GC_PARAMS"))) { opts = g_strsplit (env, ",", -1); @@ -6291,7 +6293,7 @@ mono_gc_base_init (void) } else { fprintf (stderr, "MONO_GC_PARAMS must be a comma-delimited list of one or more of the following:\n"); fprintf (stderr, " nursery-size=N (where N is an integer, possibly with a k, m or a g suffix)\n"); - fprintf (stderr, " major=COLLECTOR (where collector is `marksweep' or `copying')\n"); + fprintf (stderr, " major=COLLECTOR (where collector is `marksweep', `marksweep-par' or `copying')\n"); exit (1); } } @@ -6318,6 +6320,8 @@ mono_gc_base_init (void) if (!major_collector || !strcmp (major_collector, "marksweep")) { mono_sgen_marksweep_init (&major, DEFAULT_NURSERY_BITS, nursery_start, nursery_real_end); + } else if (!major_collector || !strcmp (major_collector, "marksweep-par")) { + mono_sgen_marksweep_par_init (&major, DEFAULT_NURSERY_BITS, nursery_start, nursery_real_end); workers_init (mono_cpu_count ()); } else if (!strcmp (major_collector, "copying")) { mono_sgen_copying_init (&major, DEFAULT_NURSERY_BITS, nursery_start, nursery_real_end); @@ -6326,6 +6330,8 @@ mono_gc_base_init (void) exit (1); } + workers_init (8); + if (major_collector) g_free (major_collector); diff --git a/mono/metadata/sgen-gc.h b/mono/metadata/sgen-gc.h index 880aeae5ce7..a774c969746 100644 --- a/mono/metadata/sgen-gc.h +++ b/mono/metadata/sgen-gc.h @@ -33,8 +33,6 @@ #include #include -#define SGEN_PARALLEL_MARK - /* * Turning on heavy statistics will turn off the managed allocator and * the managed write barrier. @@ -193,15 +191,6 @@ const static int restart_signal_num = SIGXCPU; #define LOCK_INTERRUPTION pthread_mutex_lock (&interruption_mutex) #define UNLOCK_INTERRUPTION pthread_mutex_unlock (&interruption_mutex) -#ifdef SGEN_PARALLEL_MARK -#define LOCK_GLOBAL_REMSET pthread_mutex_lock (&global_remset_mutex) -#define UNLOCK_GLOBAL_REMSET pthread_mutex_unlock (&global_remset_mutex) -#else -#define LOCK_GLOBAL_REMSET -#define UNLOCK_GLOBAL_REMSET -#endif - -#ifdef SGEN_PARALLEL_MARK #define SGEN_CAS_PTR InterlockedCompareExchangePointer #define SGEN_ATOMIC_ADD(x,i) do { \ int __old_x; \ @@ -209,10 +198,6 @@ const static int restart_signal_num = SIGXCPU; __old_x = (x); \ } while (InterlockedCompareExchange (&(x), __old_x + (i), __old_x) != __old_x); \ } while (0) -#else -#define SGEN_CAS_PTR(p,n,c) ((*(void**)(p) == (void*)(c)) ? (*(void**)(p) = (void*)(n), (void*)(c)) : (*(void**)(p))) -#define SGEN_ATOMIC_ADD(x,i) ((x) += (i)) -#endif /* non-pthread will need to provide their own version of start/stop */ #define USE_SIGNAL_BASED_START_STOP_WORLD 1 @@ -664,6 +649,7 @@ void mono_sgen_add_to_global_remset (gpointer ptr) MONO_INTERNAL; typedef struct _SgenMajorCollector SgenMajorCollector; struct _SgenMajorCollector { size_t section_size; + gboolean is_parallel; gboolean (*is_object_live) (char *obj); void* (*alloc_small_pinned_obj) (size_t size, gboolean has_references); @@ -694,6 +680,7 @@ struct _SgenMajorCollector { }; void mono_sgen_marksweep_init (SgenMajorCollector *collector, int nursery_bits, char *nursery_start, char *nursery_end) MONO_INTERNAL; +void mono_sgen_marksweep_par_init (SgenMajorCollector *collector, int nursery_bits, char *nursery_start, char *nursery_end) MONO_INTERNAL; void mono_sgen_copying_init (SgenMajorCollector *collector, int the_nursery_bits, char *the_nursery_start, char *the_nursery_end) MONO_INTERNAL; /* diff --git a/mono/metadata/sgen-major-copying.c b/mono/metadata/sgen-major-copying.c index d5409dbba8e..63632e8de65 100644 --- a/mono/metadata/sgen-major-copying.c +++ b/mono/metadata/sgen-major-copying.c @@ -630,6 +630,7 @@ mono_sgen_copying_init (SgenMajorCollector *collector, int the_nursery_bits, cha #endif collector->section_size = MAJOR_SECTION_SIZE; + collector->is_parallel = FALSE; collector->is_object_live = major_is_object_live; collector->alloc_small_pinned_obj = major_alloc_small_pinned_obj; diff --git a/mono/metadata/sgen-marksweep-par.c b/mono/metadata/sgen-marksweep-par.c new file mode 100644 index 00000000000..e728ee34dfa --- /dev/null +++ b/mono/metadata/sgen-marksweep-par.c @@ -0,0 +1,3 @@ +#define SGEN_PARALLEL_MARK + +#include "sgen-marksweep.c" diff --git a/mono/metadata/sgen-marksweep.c b/mono/metadata/sgen-marksweep.c index 27b3a27bd96..d4534f8122d 100644 --- a/mono/metadata/sgen-marksweep.c +++ b/mono/metadata/sgen-marksweep.c @@ -959,7 +959,12 @@ get_num_major_sections (void) } void -mono_sgen_marksweep_init (SgenMajorCollector *collector, int the_nursery_bits, char *the_nursery_start, char *the_nursery_end) +#ifdef SGEN_PARALLEL_MARK +mono_sgen_marksweep_par_init +#else +mono_sgen_marksweep_init +#endif + (SgenMajorCollector *collector, int the_nursery_bits, char *the_nursery_start, char *the_nursery_end) { int i; @@ -996,6 +1001,11 @@ mono_sgen_marksweep_init (SgenMajorCollector *collector, int the_nursery_bits, c mono_counters_register ("# major blocks freed", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_major_blocks_freed); collector->section_size = MAJOR_SECTION_SIZE; +#ifdef SGEN_PARALLEL_MARK + collector->is_parallel = TRUE; +#else + collector->is_parallel = FALSE; +#endif collector->is_object_live = major_is_object_live; collector->alloc_small_pinned_obj = major_alloc_small_pinned_obj; diff --git a/mono/metadata/sgen-workers.c b/mono/metadata/sgen-workers.c index bb9b3454c72..ffeda0a306b 100644 --- a/mono/metadata/sgen-workers.c +++ b/mono/metadata/sgen-workers.c @@ -22,8 +22,6 @@ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifdef SGEN_PARALLEL_MARK - typedef struct _WorkerData WorkerData; struct _WorkerData { pthread_t thread; @@ -41,6 +39,9 @@ static WorkerData workers_gc_thread_data; static int workers_num_working; static GrayQueue workers_distribute_gray_queue; + +#define WORKERS_DISTRIBUTE_GRAY_QUEUE (major.is_parallel ? &workers_distribute_gray_queue : &gray_queue) + /* * Must be a power of 2. It seems that larger values don't help much. * The main reason to make this larger would be to sustain a bigger @@ -146,6 +147,10 @@ static int workers_change_num_working (int delta) { int old, new; + + if (!major.is_parallel) + return -1; + do { old = workers_num_working; new = old + delta; @@ -201,6 +206,9 @@ workers_thread_func (void *data_untyped) static void workers_distribute_gray_queue_sections (void) { + if (!major.is_parallel) + return; + workers_gray_queue_share_redirect (&workers_distribute_gray_queue); } @@ -209,6 +217,9 @@ workers_init (int num_workers) { int i; + if (!major.is_parallel) + return; + //g_print ("initing %d workers\n", num_workers); workers_num = num_workers; @@ -260,6 +271,9 @@ workers_start_all_workers (int num_additional_workers) { int i; + if (!major.is_parallel) + return; + g_assert (workers_num_working == 0); workers_num_working = workers_num + num_additional_workers; @@ -272,6 +286,9 @@ workers_join (void) { int i; + if (!major.is_parallel) + return; + //g_print ("joining\n"); for (i = 0; i < workers_num; ++i) { if (workers_data [i].is_working) @@ -293,47 +310,12 @@ mono_sgen_is_worker_thread (pthread_t thread) { int i; + if (!major.is_parallel) + return FALSE; + for (i = 0; i < workers_num; ++i) { if (workers_data [i].thread == thread) return TRUE; } return FALSE; } - -#else - -#define workers_distribute_gray_queue gray_queue - -static int -workers_change_num_working (int delta) -{ - return -1; -} - -static void -workers_distribute_gray_queue_sections (void) -{ -} - -static void -workers_init (int num_workers) -{ -} - -static void -workers_start_all_workers (int num_additional_workers) -{ -} - -static void -workers_join (void) -{ -} - -gboolean -mono_sgen_is_worker_thread (pthread_t thread) -{ - return FALSE; -} - -#endif -- 2.11.4.GIT