diff options
Diffstat (limited to 'src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp')
-rw-r--r-- | src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp | 436 |
1 files changed, 14 insertions, 422 deletions
diff --git a/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp index 3eab85e13..5f049209e 100644 --- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp +++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -26,40 +26,12 @@ #include "gc_implementation/g1/concurrentG1Refine.hpp" #include "gc_implementation/g1/concurrentG1RefineThread.hpp" #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" -#include "gc_implementation/g1/g1CollectorPolicy.hpp" -#include "gc_implementation/g1/g1GCPhaseTimes.hpp" -#include "gc_implementation/g1/g1RemSet.hpp" -#include "gc_implementation/g1/heapRegionSeq.inline.hpp" -#include "memory/space.inline.hpp" -#include "runtime/atomic.hpp" -#include "runtime/java.hpp" -#include "utilities/copy.hpp" +#include "gc_implementation/g1/g1HotCardCache.hpp" -// Possible sizes for the card counts cache: odd primes that roughly double in size. -// (See jvmtiTagMap.cpp). - -#define MAX_SIZE ((size_t) -1) - -size_t ConcurrentG1Refine::_cc_cache_sizes[] = { - 16381, 32771, 76831, 150001, 307261, - 614563, 1228891, 2457733, 4915219, 9830479, - 19660831, 39321619, 78643219, 157286461, MAX_SIZE - }; - -ConcurrentG1Refine::ConcurrentG1Refine() : - _card_counts(NULL), _card_epochs(NULL), - _n_card_counts(0), _max_cards(0), _max_n_card_counts(0), - _cache_size_index(0), _expand_card_counts(false), - _hot_cache(NULL), - _def_use_cache(false), _use_cache(false), - // We initialize the epochs of the array to 0. By initializing - // _n_periods to 1 and not 0 we automatically invalidate all the - // entries on the array. Otherwise we might accidentally think that - // we claimed a card that was in fact never set (see CR7033292). - _n_periods(1), - _threads(NULL), _n_threads(0) +ConcurrentG1Refine::ConcurrentG1Refine(G1CollectedHeap* g1h) : + _threads(NULL), _n_threads(0), + _hot_card_cache(g1h) { - // Ergomonically select initial concurrent refinement parameters if (FLAG_IS_DEFAULT(G1ConcRefinementGreenZone)) { FLAG_SET_DEFAULT(G1ConcRefinementGreenZone, MAX2<int>(ParallelGCThreads, 1)); @@ -75,13 +47,17 @@ ConcurrentG1Refine::ConcurrentG1Refine() : FLAG_SET_DEFAULT(G1ConcRefinementRedZone, yellow_zone() * 2); } set_red_zone(MAX2<int>(G1ConcRefinementRedZone, yellow_zone())); + _n_worker_threads = thread_num(); // We need one extra thread to do the young gen rset size sampling. _n_threads = _n_worker_threads + 1; + reset_threshold_step(); _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads, mtGC); + int worker_id_offset = (int)DirtyCardQueueSet::num_par_ids(); + ConcurrentG1RefineThread *next = NULL; for (int i = _n_threads - 1; i >= 0; i--) { ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, worker_id_offset, i); @@ -100,74 +76,8 @@ void ConcurrentG1Refine::reset_threshold_step() { } } -int ConcurrentG1Refine::thread_num() { - return MAX2<int>((G1ConcRefinementThreads > 0) ? G1ConcRefinementThreads : ParallelGCThreads, 1); -} - void ConcurrentG1Refine::init() { - if (G1ConcRSLogCacheSize > 0) { - _g1h = G1CollectedHeap::heap(); - - _max_cards = _g1h->max_capacity() >> CardTableModRefBS::card_shift; - _max_n_card_counts = _max_cards * G1MaxHotCardCountSizePercent / 100; - - size_t max_card_num = ((size_t)1 << (sizeof(unsigned)*BitsPerByte-1)) - 1; - guarantee(_max_cards < max_card_num, "card_num representation"); - - // We need _n_card_counts to be less than _max_n_card_counts here - // so that the expansion call (below) actually allocates the - // _counts and _epochs arrays. - assert(_n_card_counts == 0, "pre-condition"); - assert(_max_n_card_counts > 0, "pre-condition"); - - // Find the index into cache size array that is of a size that's - // large enough to hold desired_sz. - size_t desired_sz = _max_cards / InitialCacheFraction; - int desired_sz_index = 0; - while (_cc_cache_sizes[desired_sz_index] < desired_sz) { - desired_sz_index += 1; - assert(desired_sz_index < MAX_CC_CACHE_INDEX, "invariant"); - } - assert(desired_sz_index < MAX_CC_CACHE_INDEX, "invariant"); - - // If the desired_sz value is between two sizes then - // _cc_cache_sizes[desired_sz_index-1] < desired_sz <= _cc_cache_sizes[desired_sz_index] - // we will start with the lower size in the optimistic expectation that - // we will not need to expand up. Note desired_sz_index could also be 0. - if (desired_sz_index > 0 && - _cc_cache_sizes[desired_sz_index] > desired_sz) { - desired_sz_index -= 1; - } - - if (!expand_card_count_cache(desired_sz_index)) { - // Allocation was unsuccessful - exit - vm_exit_during_initialization("Could not reserve enough space for card count cache"); - } - assert(_n_card_counts > 0, "post-condition"); - assert(_cache_size_index == desired_sz_index, "post-condition"); - - Copy::fill_to_bytes(&_card_counts[0], - _n_card_counts * sizeof(CardCountCacheEntry)); - Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry)); - - ModRefBarrierSet* bs = _g1h->mr_bs(); - guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition"); - _ct_bs = (CardTableModRefBS*)bs; - _ct_bot = _ct_bs->byte_for_const(_g1h->reserved_region().start()); - - _def_use_cache = true; - _use_cache = true; - _hot_cache_size = (1 << G1ConcRSLogCacheSize); - _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size, mtGC); - _n_hot = 0; - _hot_cache_idx = 0; - - // For refining the cards in the hot cache in parallel - int n_workers = (ParallelGCThreads > 0 ? - _g1h->workers()->total_workers() : 1); - _hot_cache_par_chunk_size = MAX2(1, _hot_cache_size / n_workers); - _hot_cache_par_claimed_idx = 0; - } + _hot_card_cache.initialize(); } void ConcurrentG1Refine::stop() { @@ -188,17 +98,6 @@ void ConcurrentG1Refine::reinitialize_threads() { } ConcurrentG1Refine::~ConcurrentG1Refine() { - if (G1ConcRSLogCacheSize > 0) { - // Please see the comment in allocate_card_count_cache - // for why we call os::malloc() and os::free() directly. - assert(_card_counts != NULL, "Logic"); - os::free(_card_counts, mtGC); - assert(_card_epochs != NULL, "Logic"); - os::free(_card_epochs, mtGC); - - assert(_hot_cache != NULL, "Logic"); - FREE_C_HEAP_ARRAY(jbyte*, _hot_cache, mtGC); - } if (_threads != NULL) { for (int i = 0; i < _n_threads; i++) { delete _threads[i]; @@ -215,317 +114,10 @@ void ConcurrentG1Refine::threads_do(ThreadClosure *tc) { } } -bool ConcurrentG1Refine::is_young_card(jbyte* card_ptr) { - HeapWord* start = _ct_bs->addr_for(card_ptr); - HeapRegion* r = _g1h->heap_region_containing(start); - if (r != NULL && r->is_young()) { - return true; - } - // This card is not associated with a heap region - // so can't be young. - return false; -} - -jbyte* ConcurrentG1Refine::add_card_count(jbyte* card_ptr, int* count, bool* defer) { - unsigned new_card_num = ptr_2_card_num(card_ptr); - unsigned bucket = hash(new_card_num); - assert(0 <= bucket && bucket < _n_card_counts, "Bounds"); - - CardCountCacheEntry* count_ptr = &_card_counts[bucket]; - CardEpochCacheEntry* epoch_ptr = &_card_epochs[bucket]; - - // We have to construct a new entry if we haven't updated the counts - // during the current period, or if the count was updated for a - // different card number. - unsigned int new_epoch = (unsigned int) _n_periods; - julong new_epoch_entry = make_epoch_entry(new_card_num, new_epoch); - - while (true) { - // Fetch the previous epoch value - julong prev_epoch_entry = epoch_ptr->_value; - julong cas_res; - - if (extract_epoch(prev_epoch_entry) != new_epoch) { - // This entry has not yet been updated during this period. - // Note: we update the epoch value atomically to ensure - // that there is only one winner that updates the cached - // card_ptr value even though all the refine threads share - // the same epoch value. - - cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry, - (volatile jlong*)&epoch_ptr->_value, - (jlong) prev_epoch_entry); - - if (cas_res == prev_epoch_entry) { - // We have successfully won the race to update the - // epoch and card_num value. Make it look like the - // count and eviction count were previously cleared. - count_ptr->_count = 1; - count_ptr->_evict_count = 0; - *count = 0; - // We can defer the processing of card_ptr - *defer = true; - return card_ptr; - } - // We did not win the race to update the epoch field, so some other - // thread must have done it. The value that gets returned by CAS - // should be the new epoch value. - assert(extract_epoch(cas_res) == new_epoch, "unexpected epoch"); - // We could 'continue' here or just re-read the previous epoch value - prev_epoch_entry = epoch_ptr->_value; - } - - // The epoch entry for card_ptr has been updated during this period. - unsigned old_card_num = extract_card_num(prev_epoch_entry); - - // The card count that will be returned to caller - *count = count_ptr->_count; - - // Are we updating the count for the same card? - if (new_card_num == old_card_num) { - // Same card - just update the count. We could have more than one - // thread racing to update count for the current card. It should be - // OK not to use a CAS as the only penalty should be some missed - // increments of the count which delays identifying the card as "hot". - - if (*count < max_jubyte) count_ptr->_count++; - // We can defer the processing of card_ptr - *defer = true; - return card_ptr; - } - - // Different card - evict old card info - if (count_ptr->_evict_count < max_jubyte) count_ptr->_evict_count++; - if (count_ptr->_evict_count > G1CardCountCacheExpandThreshold) { - // Trigger a resize the next time we clear - _expand_card_counts = true; - } - - cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry, - (volatile jlong*)&epoch_ptr->_value, - (jlong) prev_epoch_entry); - - if (cas_res == prev_epoch_entry) { - // We successfully updated the card num value in the epoch entry - count_ptr->_count = 0; // initialize counter for new card num - jbyte* old_card_ptr = card_num_2_ptr(old_card_num); - - // Even though the region containg the card at old_card_num was not - // in the young list when old_card_num was recorded in the epoch - // cache it could have been added to the free list and subsequently - // added to the young list in the intervening time. See CR 6817995. - // We do not deal with this case here - it will be handled in - // HeapRegion::oops_on_card_seq_iterate_careful after it has been - // determined that the region containing the card has been allocated - // to, and it's safe to check the young type of the region. - - // We do not want to defer processing of card_ptr in this case - // (we need to refine old_card_ptr and card_ptr) - *defer = false; - return old_card_ptr; - } - // Someone else beat us - try again. - } -} - -jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr, bool* defer) { - int count; - jbyte* cached_ptr = add_card_count(card_ptr, &count, defer); - assert(cached_ptr != NULL, "bad cached card ptr"); - - // We've just inserted a card pointer into the card count cache - // and got back the card that we just inserted or (evicted) the - // previous contents of that count slot. - - // The card we got back could be in a young region. When the - // returned card (if evicted) was originally inserted, we had - // determined that its containing region was not young. However - // it is possible for the region to be freed during a cleanup - // pause, then reallocated and tagged as young which will result - // in the returned card residing in a young region. - // - // We do not deal with this case here - the change from non-young - // to young could be observed at any time - it will be handled in - // HeapRegion::oops_on_card_seq_iterate_careful after it has been - // determined that the region containing the card has been allocated - // to. - - // The card pointer we obtained from card count cache is not hot - // so do not store it in the cache; return it for immediate - // refining. - if (count < G1ConcRSHotCardLimit) { - return cached_ptr; - } - - // Otherwise, the pointer we got from the _card_counts cache is hot. - jbyte* res = NULL; - MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag); - if (_n_hot == _hot_cache_size) { - res = _hot_cache[_hot_cache_idx]; - _n_hot--; - } - // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx. - _hot_cache[_hot_cache_idx] = cached_ptr; - _hot_cache_idx++; - if (_hot_cache_idx == _hot_cache_size) _hot_cache_idx = 0; - _n_hot++; - - // The card obtained from the hot card cache could be in a young - // region. See above on how this can happen. - - return res; -} - -void ConcurrentG1Refine::clean_up_cache(int worker_i, - G1RemSet* g1rs, - DirtyCardQueue* into_cset_dcq) { - assert(!use_cache(), "cache should be disabled"); - int start_idx; - - while ((start_idx = _hot_cache_par_claimed_idx) < _n_hot) { // read once - int end_idx = start_idx + _hot_cache_par_chunk_size; - - if (start_idx == - Atomic::cmpxchg(end_idx, &_hot_cache_par_claimed_idx, start_idx)) { - // The current worker has successfully claimed the chunk [start_idx..end_idx) - end_idx = MIN2(end_idx, _n_hot); - for (int i = start_idx; i < end_idx; i++) { - jbyte* entry = _hot_cache[i]; - if (entry != NULL) { - if (g1rs->concurrentRefineOneCard(entry, worker_i, true)) { - // 'entry' contains references that point into the current - // collection set. We need to record 'entry' in the DCQS - // that's used for that purpose. - // - // The only time we care about recording cards that contain - // references that point into the collection set is during - // RSet updating while within an evacuation pause. - // In this case worker_i should be the id of a GC worker thread - assert(SafepointSynchronize::is_at_safepoint(), "not during an evacuation pause"); - assert(worker_i < (int) (ParallelGCThreads == 0 ? 1 : ParallelGCThreads), "incorrect worker id"); - into_cset_dcq->enqueue(entry); - } - } - } - } - } -} - -// The arrays used to hold the card counts and the epochs must have -// a 1:1 correspondence. Hence they are allocated and freed together -// Returns true if the allocations of both the counts and epochs -// were successful; false otherwise. -bool ConcurrentG1Refine::allocate_card_count_cache(size_t n, - CardCountCacheEntry** counts, - CardEpochCacheEntry** epochs) { - // We call the allocation/free routines directly for the counts - // and epochs arrays. The NEW_C_HEAP_ARRAY/FREE_C_HEAP_ARRAY - // macros call AllocateHeap and FreeHeap respectively. - // AllocateHeap will call vm_exit_out_of_memory in the event - // of an allocation failure and abort the JVM. With the - // _counts/epochs arrays we only need to abort the JVM if the - // initial allocation of these arrays fails. - // - // Additionally AllocateHeap/FreeHeap do some tracing of - // allocate/free calls so calling one without calling the - // other can cause inconsistencies in the tracing. So we - // call neither. - - assert(*counts == NULL, "out param"); - assert(*epochs == NULL, "out param"); - - size_t counts_size = n * sizeof(CardCountCacheEntry); - size_t epochs_size = n * sizeof(CardEpochCacheEntry); - - *counts = (CardCountCacheEntry*) os::malloc(counts_size, mtGC); - if (*counts == NULL) { - // allocation was unsuccessful - return false; - } - - *epochs = (CardEpochCacheEntry*) os::malloc(epochs_size, mtGC); - if (*epochs == NULL) { - // allocation was unsuccessful - free counts array - assert(*counts != NULL, "must be"); - os::free(*counts, mtGC); - *counts = NULL; - return false; - } - - // We successfully allocated both counts and epochs - return true; -} - -// Returns true if the card counts/epochs cache was -// successfully expanded; false otherwise. -bool ConcurrentG1Refine::expand_card_count_cache(int cache_size_idx) { - // Can we expand the card count and epoch tables? - if (_n_card_counts < _max_n_card_counts) { - assert(cache_size_idx >= 0 && cache_size_idx < MAX_CC_CACHE_INDEX, "oob"); - - size_t cache_size = _cc_cache_sizes[cache_size_idx]; - // Make sure we don't go bigger than we will ever need - cache_size = MIN2(cache_size, _max_n_card_counts); - - // Should we expand the card count and card epoch tables? - if (cache_size > _n_card_counts) { - // We have been asked to allocate new, larger, arrays for - // the card counts and the epochs. Attempt the allocation - // of both before we free the existing arrays in case - // the allocation is unsuccessful... - CardCountCacheEntry* counts = NULL; - CardEpochCacheEntry* epochs = NULL; - - if (allocate_card_count_cache(cache_size, &counts, &epochs)) { - // Allocation was successful. - // We can just free the old arrays; we're - // not interested in preserving the contents - if (_card_counts != NULL) os::free(_card_counts, mtGC); - if (_card_epochs != NULL) os::free(_card_epochs, mtGC); - - // Cache the size of the arrays and the index that got us there. - _n_card_counts = cache_size; - _cache_size_index = cache_size_idx; - - _card_counts = counts; - _card_epochs = epochs; - - // We successfully allocated/expanded the caches. - return true; - } - } - } - - // We did not successfully expand the caches. - return false; -} - -void ConcurrentG1Refine::clear_and_record_card_counts() { - if (G1ConcRSLogCacheSize == 0) { - return; - } - - double start = os::elapsedTime(); - - if (_expand_card_counts) { - int new_idx = _cache_size_index + 1; - - if (expand_card_count_cache(new_idx)) { - // Allocation was successful and _n_card_counts has - // been updated to the new size. We only need to clear - // the epochs so we don't read a bogus epoch value - // when inserting a card into the hot card cache. - Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry)); - } - _expand_card_counts = false; - } - - int this_epoch = (int) _n_periods; - assert((this_epoch+1) <= max_jint, "to many periods"); - // Update epoch - _n_periods++; - double cc_clear_time_ms = (os::elapsedTime() - start) * 1000; - _g1h->g1_policy()->phase_times()->record_cc_clear_time_ms(cc_clear_time_ms); +int ConcurrentG1Refine::thread_num() { + int n_threads = (G1ConcRefinementThreads > 0) ? G1ConcRefinementThreads + : ParallelGCThreads; + return MAX2<int>(n_threads, 1); } void ConcurrentG1Refine::print_worker_threads_on(outputStream* st) const { |