From c6a03bc5497dda8aeffe36e56e8ce45c4ad09f73 Mon Sep 17 00:00:00 2001 From: Jason Tedor Date: Mon, 26 Jun 2017 14:09:15 -0400 Subject: Introduce primary context (#25122) * Introduce primary context The target of a primary relocation is not aware of the state of the replication group. In particular, it is not tracking in-sync and initializing shards and their checkpoints. This means that after the target shard is started, its knowledge of the replication group could differ from that of the relocation source. In particular, this differing view can lead to it computing a global checkpoint that moves backwards after it becomes aware of the state of the entire replication group. This commit addresses this issue by transferring a primary context during relocation handoff. * Fix test * Add assertion messages * Javadocs * Barrier between marking a shard in sync and relocating * Fix misplaced call * Paranoia * Better latch countdown * Catch any exception * Fix comment * Fix wait for cluster state relocation test * Update knowledge via upate local checkpoint API * toString * Visibility * Refactor permit * Push down * Imports * Docs * Fix compilation * Remove assertion * Fix compilation * Remove context wrapper * Move PrimaryContext to new package * Piping for cluster state version This commit adds piping for the cluster state version to the global checkpoint tracker. We do not use it yet. * Remove unused import * Implement versioning in tracker * Fix test * Unneeded public * Imports * Promote on our own * Add tests * Import * Newline * Update comment * Serialization * Assertion message * Update stale comment * Remove newline * Less verbose * Remove redundant assertion * Tracking -> in-sync * Assertions * Just say no Friends do not let friends block the cluster state update thread on network operations. * Extra newline * Add allocation ID to assertion * Rename method * Another rename * Introduce sealing * Sealing tests * One more assertion * Fix imports * Safer sealing * Remove check * Remove another sealed check --- .../java/org/elasticsearch/recovery/RecoveryWhileUnderLoadIT.java | 2 +- core/src/test/java/org/elasticsearch/recovery/RelocationIT.java | 8 -------- 2 files changed, 1 insertion(+), 9 deletions(-) (limited to 'core/src/test/java/org/elasticsearch/recovery') diff --git a/core/src/test/java/org/elasticsearch/recovery/RecoveryWhileUnderLoadIT.java b/core/src/test/java/org/elasticsearch/recovery/RecoveryWhileUnderLoadIT.java index e1a7a07448..b0d25f43bd 100644 --- a/core/src/test/java/org/elasticsearch/recovery/RecoveryWhileUnderLoadIT.java +++ b/core/src/test/java/org/elasticsearch/recovery/RecoveryWhileUnderLoadIT.java @@ -53,7 +53,7 @@ import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAllS import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoTimeout; -@TestLogging("_root:DEBUG,org.elasticsearch.index.shard:TRACE") +@TestLogging("_root:DEBUG,org.elasticsearch.index.shard:TRACE,org.elasticsearch.cluster.service:TRACE,org.elasticsearch.index.seqno:TRACE,org.elasticsearch.indices.recovery:TRACE") public class RecoveryWhileUnderLoadIT extends ESIntegTestCase { private final Logger logger = Loggers.getLogger(RecoveryWhileUnderLoadIT.class); diff --git a/core/src/test/java/org/elasticsearch/recovery/RelocationIT.java b/core/src/test/java/org/elasticsearch/recovery/RelocationIT.java index fe83847bff..48f6fdeaed 100644 --- a/core/src/test/java/org/elasticsearch/recovery/RelocationIT.java +++ b/core/src/test/java/org/elasticsearch/recovery/RelocationIT.java @@ -514,14 +514,6 @@ public class RelocationIT extends ESIntegTestCase { // refresh is a replication action so this forces a global checkpoint sync which is needed as these are asserted on in tear down client().admin().indices().prepareRefresh("test").get(); - /* - * We have to execute a second refresh as in the face of relocations, the relocation target is not aware of the in-sync set and so - * the first refresh would bring back the local checkpoint for any shards added to the in-sync set that the relocation target was - * not tracking. - */ - // TODO: remove this after a primary context is transferred during relocation handoff - client().admin().indices().prepareRefresh("test").get(); - } class RecoveryCorruption extends MockTransportService.DelegateTransport { -- cgit v1.2.3