From c6a03bc5497dda8aeffe36e56e8ce45c4ad09f73 Mon Sep 17 00:00:00 2001
From: Jason Tedor <jason@tedor.me>
Date: Mon, 26 Jun 2017 14:09:15 -0400
Subject: Introduce primary context (#25122)

* Introduce primary context

The target of a primary relocation is not aware of the state of the
replication group. In particular, it is not tracking in-sync and
initializing shards and their checkpoints. This means that after the
target shard is started, its knowledge of the replication group could
differ from that of the relocation source. In particular, this differing
view can lead to it computing a global checkpoint that moves backwards
after it becomes aware of the state of the entire replication
group. This commit addresses this issue by transferring a primary
context during relocation handoff.

* Fix test

* Add assertion messages

* Javadocs

* Barrier between marking a shard in sync and relocating

* Fix misplaced call

* Paranoia

* Better latch countdown

* Catch any exception

* Fix comment

* Fix wait for cluster state relocation test

* Update knowledge via upate local checkpoint API

* toString

* Visibility

* Refactor permit

* Push down

* Imports

* Docs

* Fix compilation

* Remove assertion

* Fix compilation

* Remove context wrapper

* Move PrimaryContext to new package

* Piping for cluster state version

This commit adds piping for the cluster state version to the global
checkpoint tracker. We do not use it yet.

* Remove unused import

* Implement versioning in tracker

* Fix test

* Unneeded public

* Imports

* Promote on our own

* Add tests

* Import

* Newline

* Update comment

* Serialization

* Assertion message

* Update stale comment

* Remove newline

* Less verbose

* Remove redundant assertion

* Tracking -> in-sync

* Assertions

* Just say no

Friends do not let friends block the cluster state update thread on
network operations.

* Extra newline

* Add allocation ID to assertion

* Rename method

* Another rename

* Introduce sealing

* Sealing tests

* One more assertion

* Fix imports

* Safer sealing

* Remove check

* Remove another sealed check
---
 .../java/org/elasticsearch/recovery/RecoveryWhileUnderLoadIT.java | 2 +-
 core/src/test/java/org/elasticsearch/recovery/RelocationIT.java   | 8 --------
 2 files changed, 1 insertion(+), 9 deletions(-)

(limited to 'core/src/test/java/org/elasticsearch/recovery')

diff --git a/core/src/test/java/org/elasticsearch/recovery/RecoveryWhileUnderLoadIT.java b/core/src/test/java/org/elasticsearch/recovery/RecoveryWhileUnderLoadIT.java
index e1a7a07448..b0d25f43bd 100644
--- a/core/src/test/java/org/elasticsearch/recovery/RecoveryWhileUnderLoadIT.java
+++ b/core/src/test/java/org/elasticsearch/recovery/RecoveryWhileUnderLoadIT.java
@@ -53,7 +53,7 @@ import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAllS
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoTimeout;
 
-@TestLogging("_root:DEBUG,org.elasticsearch.index.shard:TRACE")
+@TestLogging("_root:DEBUG,org.elasticsearch.index.shard:TRACE,org.elasticsearch.cluster.service:TRACE,org.elasticsearch.index.seqno:TRACE,org.elasticsearch.indices.recovery:TRACE")
 public class RecoveryWhileUnderLoadIT extends ESIntegTestCase {
     private final Logger logger = Loggers.getLogger(RecoveryWhileUnderLoadIT.class);
 
diff --git a/core/src/test/java/org/elasticsearch/recovery/RelocationIT.java b/core/src/test/java/org/elasticsearch/recovery/RelocationIT.java
index fe83847bff..48f6fdeaed 100644
--- a/core/src/test/java/org/elasticsearch/recovery/RelocationIT.java
+++ b/core/src/test/java/org/elasticsearch/recovery/RelocationIT.java
@@ -514,14 +514,6 @@ public class RelocationIT extends ESIntegTestCase {
 
         // refresh is a replication action so this forces a global checkpoint sync which is needed as these are asserted on in tear down
         client().admin().indices().prepareRefresh("test").get();
-        /*
-         * We have to execute a second refresh as in the face of relocations, the relocation target is not aware of the in-sync set and so
-         * the first refresh would bring back the local checkpoint for any shards added to the in-sync set that the relocation target was
-         * not tracking.
-         */
-        // TODO: remove this after a primary context is transferred during relocation handoff
-        client().admin().indices().prepareRefresh("test").get();
-
     }
 
     class RecoveryCorruption extends MockTransportService.DelegateTransport {
-- 
cgit v1.2.3