org.elasticsearch.cluster.ClusterStateObserver Java Examples

The following examples show how to use org.elasticsearch.cluster.ClusterStateObserver. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TransportMasterNodeAction.java    From crate with Apache License 2.0 6 votes vote down vote up
private void retry(final Throwable failure, final Predicate<ClusterState> statePredicate) {
    observer.waitForNextChange(
        new ClusterStateObserver.Listener() {
            @Override
            public void onNewClusterState(ClusterState state) {
                doStart(state);
            }

            @Override
            public void onClusterServiceClose() {
                listener.onFailure(new NodeClosedException(clusterService.localNode()));
            }

            @Override
            public void onTimeout(TimeValue timeout) {
                logger.debug(() -> new ParameterizedMessage("timed out while retrying [{}] after failure (timeout [{}])",
                    actionName, timeout), failure);
                listener.onFailure(new MasterNotDiscoveredException(failure));
            }
        }, statePredicate
    );
}
 
Example #2
Source File: TransportMasterNodeAction.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
private void retry(final Throwable failure, final ClusterStateObserver.ChangePredicate changePredicate) {
    observer.waitForNextChange(
        new ClusterStateObserver.Listener() {
            @Override
            public void onNewClusterState(ClusterState state) {
                doStart();
            }

            @Override
            public void onClusterServiceClose() {
                listener.onFailure(new NodeClosedException(clusterService.localNode()));
            }

            @Override
            public void onTimeout(TimeValue timeout) {
                logger.debug("timed out while retrying [{}] after failure (timeout [{}])", failure, actionName, timeout);
                listener.onFailure(new MasterNotDiscoveredException(failure));
            }
        }, changePredicate
    );
}
 
Example #3
Source File: TransportReplicaShardIngestAction.java    From elasticsearch-helper with Apache License 2.0 6 votes vote down vote up
private void retry(@Nullable final Throwable failure) {
    if (observer.isTimedOut()) {
        listener.onFailure(failure);
        return;
    }
    request.operationThreaded(true);
    observer.waitForNextChange(new ClusterStateObserver.Listener() {
        @Override
        public void onNewClusterState(ClusterState state) {
            doStart();
        }

        @Override
        public void onClusterServiceClose() {
            listener.onFailure(new NodeClosedException(clusterService.localNode()));
        }

        @Override
        public void onTimeout(TimeValue timeout) {
            if (doStart()) {
                return;
            }
            raiseTimeoutFailure(timeout, failure);
        }
    });
}
 
Example #4
Source File: TransportLeaderShardIngestAction.java    From elasticsearch-helper with Apache License 2.0 6 votes vote down vote up
private void retry(@Nullable final Throwable failure) {
    if (observer.isTimedOut()) {
        listener.onFailure(failure);
        return;
    }
    request.operationThreaded(true);
    observer.waitForNextChange(new ClusterStateObserver.Listener() {
        @Override
        public void onNewClusterState(ClusterState state) {
            doStart();
        }

        @Override
        public void onClusterServiceClose() {
            listener.onFailure(new NodeClosedException(clusterService.localNode()));
        }

        @Override
        public void onTimeout(TimeValue timeout) {
            if (doStart()) {
                return;
            }
            raiseTimeoutFailure(timeout, failure);
        }
    });
}
 
Example #5
Source File: ClusterApplierService.java    From crate with Apache License 2.0 6 votes vote down vote up
/** asserts that the current stack trace does <b>NOT</b> involve a cluster state applier */
private static boolean assertNotCalledFromClusterStateApplier(String reason) {
    if (Thread.currentThread().getName().contains(CLUSTER_UPDATE_THREAD_NAME)) {
        for (StackTraceElement element : Thread.currentThread().getStackTrace()) {
            final String className = element.getClassName();
            final String methodName = element.getMethodName();
            if (className.equals(ClusterStateObserver.class.getName())) {
                // people may start an observer from an applier
                return true;
            } else if (className.equals(ClusterApplierService.class.getName())
                && methodName.equals("callClusterStateAppliers")) {
                throw new AssertionError("should not be called by a cluster state applier. reason [" + reason + "]");
            }
        }
    }
    return true;
}
 
Example #6
Source File: TransportAddVotingConfigExclusionsActionTests.java    From crate with Apache License 2.0 5 votes vote down vote up
@Before
public void setupForTest() {
    final MockTransport transport = new MockTransport();
    transportService = transport.createTransportService(
        Settings.EMPTY,
        threadPool,
        TransportService.NOOP_TRANSPORT_INTERCEPTOR,
        boundTransportAddress -> localNode,
        null
    );

    new TransportAddVotingConfigExclusionsAction(
        transportService, clusterService, threadPool, new IndexNameExpressionResolver()); // registers action

    transportService.start();
    transportService.acceptIncomingRequests();

    final VotingConfiguration allNodesConfig = VotingConfiguration.of(localNode, otherNode1, otherNode2);

    setState(clusterService, builder(new ClusterName("cluster"))
        .nodes(new Builder().add(localNode).add(otherNode1).add(otherNode2).add(otherDataNode)
            .localNodeId(localNode.getId()).masterNodeId(localNode.getId()))
        .metaData(MetaData.builder()
            .coordinationMetaData(CoordinationMetaData.builder().lastAcceptedConfiguration(allNodesConfig)
                .lastCommittedConfiguration(allNodesConfig).build())));

    clusterStateObserver = new ClusterStateObserver(clusterService, null, logger);
}
 
Example #7
Source File: TransportReplicationAction.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Override
public void onFailure(Throwable t) {
    if (t instanceof RetryOnReplicaException) {
        logger.trace("Retrying operation on replica, action [{}], request [{}]", t, transportReplicaAction, request);
        observer.waitForNextChange(new ClusterStateObserver.Listener() {
            @Override
            public void onNewClusterState(ClusterState state) {
                // Forking a thread on local node via transport service so that custom transport service have an
                // opportunity to execute custom  logic before the replica operation begins
                String extraMessage = "action [" + transportReplicaAction  + "], request[" + request + "]";
                TransportChannelResponseHandler<TransportResponse.Empty> handler = TransportChannelResponseHandler.emptyResponseHandler(logger, channel, extraMessage);
                transportService.sendRequest(clusterService.localNode(), transportReplicaAction, request, handler);
            }

            @Override
            public void onClusterServiceClose() {
                responseWithFailure(new NodeClosedException(clusterService.localNode()));
            }

            @Override
            public void onTimeout(TimeValue timeout) {
                throw new AssertionError("Cannot happen: there is not timeout");
            }
        });
    } else {
        try {
            failReplicaIfNeeded(t);
        } catch (Throwable unexpected) {
            logger.error("{} unexpected error while failing replica", unexpected, request.shardId().id());
        } finally {
            responseWithFailure(t);
        }
    }
}
 
Example #8
Source File: ShardStateObserver.java    From crate with Apache License 2.0 5 votes vote down vote up
private CompletableFuture<ShardRouting> waitForActiveShard(ShardId shardId, ClusterState state) {
    var stateObserver = new ClusterStateObserver(
        state, clusterService, MAX_WAIT_TIME_FOR_NEW_STATE, LOGGER);
    var listener = new RetryIsShardActive(shardId);
    stateObserver.waitForNextChange(listener, newState -> shardStartedOrIndexDeleted(newState, shardId));
    return listener.result();
}
 
Example #9
Source File: TransportReplicationAction.java    From crate with Apache License 2.0 5 votes vote down vote up
ReroutePhase(ReplicationTask task, Request request, ActionListener<Response> listener) {
    this.request = request;
    if (task != null) {
        this.request.setParentTask(clusterService.localNode().getId(), task.getId());
    }
    this.listener = listener;
    this.task = task;
    this.observer = new ClusterStateObserver(clusterService, request.timeout(), logger);
}
 
Example #10
Source File: TransportReplicationAction.java    From crate with Apache License 2.0 5 votes vote down vote up
@Override
public void onFailure(Exception e) {
    if (e instanceof RetryOnReplicaException) {
        logger.trace(
            () -> new ParameterizedMessage(
                "Retrying operation on replica, action [{}], request [{}]",
                transportReplicaAction,
                request
            ),
            e
        );
        request.onRetry();
        observer.waitForNextChange(new ClusterStateObserver.Listener() {
            @Override
            public void onNewClusterState(ClusterState state) {
                // Forking a thread on local node via transport service so that custom transport service have an
                // opportunity to execute custom logic before the replica operation begins
                String extraMessage = "action [" + transportReplicaAction + "], request[" + request + "]";
                TransportChannelResponseHandler<TransportResponse.Empty> handler =
                    new TransportChannelResponseHandler<>(logger, channel, extraMessage, in -> TransportResponse.Empty.INSTANCE);
                transportService.sendRequest(clusterService.localNode(), transportReplicaAction,
                    new ConcreteReplicaRequest<>(request, targetAllocationID, primaryTerm,
                        globalCheckpoint, maxSeqNoOfUpdatesOrDeletes),
                    handler);
            }

            @Override
            public void onClusterServiceClose() {
                responseWithFailure(new NodeClosedException(clusterService.localNode()));
            }

            @Override
            public void onTimeout(TimeValue timeout) {
                throw new AssertionError("Cannot happen: there is not timeout");
            }
        });
    } else {
        responseWithFailure(e);
    }
}
 
Example #11
Source File: NodeAndClusterIdStateListener.java    From crate with Apache License 2.0 5 votes vote down vote up
/**
 * Subscribes for the first cluster state update where nodeId and clusterId is present
 * and sets these values in {@link NodeAndClusterIdConverter}.
 */
public static void getAndSetNodeIdAndClusterId(ClusterService clusterService) {
    ClusterState clusterState = clusterService.state();
    ClusterStateObserver observer = new ClusterStateObserver(clusterState, clusterService, null, LOGGER);

    observer.waitForNextChange(new NodeAndClusterIdStateListener(), NodeAndClusterIdStateListener::isNodeAndClusterIdPresent);
}
 
Example #12
Source File: ShardStateAction.java    From crate with Apache License 2.0 5 votes vote down vote up
protected void waitForNewMasterAndRetry(String actionName,
                                        ClusterStateObserver observer,
                                        TransportRequest request,
                                        ActionListener<Void> listener,
                                        Predicate<ClusterState> changePredicate) {
    observer.waitForNextChange(new ClusterStateObserver.Listener() {
        @Override
        public void onNewClusterState(ClusterState state) {
            if (LOGGER.isTraceEnabled()) {
                LOGGER.trace("new cluster state [{}] after waiting for master election for shard entry [{}]", state, request);
            }
            sendShardAction(actionName, state, request, listener);
        }

        @Override
        public void onClusterServiceClose() {
            LOGGER.warn("node closed while execution action [{}] for shard entry [{}]", actionName, request);
            listener.onFailure(new NodeClosedException(clusterService.localNode()));
        }

        @Override
        public void onTimeout(TimeValue timeout) {
            // we wait indefinitely for a new master
            assert false;
        }
    }, changePredicate);
}
 
Example #13
Source File: ShardStateAction.java    From crate with Apache License 2.0 5 votes vote down vote up
private void sendShardAction(final String actionName, final ClusterState currentState, final TransportRequest request, final ActionListener<Void> listener) {
    ClusterStateObserver observer = new ClusterStateObserver(currentState, clusterService, null, LOGGER);
    DiscoveryNode masterNode = currentState.nodes().getMasterNode();
    Predicate<ClusterState> changePredicate = MasterNodeChangePredicate.build(currentState);
    if (masterNode == null) {
        LOGGER.warn("no master known for action [{}] for shard entry [{}]", actionName, request);
        waitForNewMasterAndRetry(actionName, observer, request, listener, changePredicate);
    } else {
        LOGGER.debug("sending [{}] to [{}] for shard entry [{}]", actionName, masterNode.getId(), request);
        transportService.sendRequest(masterNode,
            actionName, request, new EmptyTransportResponseHandler(ThreadPool.Names.SAME) {
                @Override
                public void handleResponse(TransportResponse.Empty response) {
                    listener.onResponse(null);
                }

                @Override
                public void handleException(TransportException exp) {
                    if (isMasterChannelException(exp)) {
                        waitForNewMasterAndRetry(actionName, observer, request, listener, changePredicate);
                    } else {
                        LOGGER.warn(new ParameterizedMessage("unexpected failure while sending request [{}] to [{}] for shard entry [{}]", actionName, masterNode, request), exp);
                        listener.onFailure(exp instanceof RemoteTransportException ? (Exception) (exp.getCause() instanceof Exception ? exp.getCause() : new ElasticsearchException(exp.getCause())) : exp);
                    }
                }
            });
    }
}
 
Example #14
Source File: TransportReplicationAction.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
ReroutePhase(ReplicationTask task, Request request, ActionListener<Response> listener) {
    this.request = request;
    if (task != null) {
        this.request.setParentTask(clusterService.localNode().getId(), task.getId());
    }
    this.listener = listener;
    this.task = task;
    this.observer = new ClusterStateObserver(clusterService, request.timeout(), logger);
}
 
Example #15
Source File: PeerRecoveryTargetService.java    From crate with Apache License 2.0 4 votes vote down vote up
@Override
public void messageReceived(final RecoveryTranslogOperationsRequest request, final TransportChannel channel,
                            Task task) throws IOException {
    try (RecoveryRef recoveryRef =
             onGoingRecoveries.getRecoverySafe(request.recoveryId(), request.shardId())) {
        final ClusterStateObserver observer = new ClusterStateObserver(clusterService, null, LOGGER);
        final RecoveryTarget recoveryTarget = recoveryRef.target();
        final ActionListener<RecoveryTranslogOperationsResponse> listener =
            new HandledTransportAction.ChannelActionListener<>(channel, Actions.TRANSLOG_OPS, request);
        final Consumer<Exception> retryOnMappingException = exception -> {
            // in very rare cases a translog replay from primary is processed before
            // a mapping update on this node which causes local mapping changes since
            // the mapping (clusterstate) might not have arrived on this node.
            LOGGER.debug("delaying recovery due to missing mapping changes", exception);
            // we do not need to use a timeout here since the entire recovery mechanism has an
            // inactivity protection (it will be canceled)
            observer.waitForNextChange(new ClusterStateObserver.Listener() {
                @Override
                public void onNewClusterState(ClusterState state) {
                    try {
                        messageReceived(request, channel, task);
                    } catch (Exception e) {
                        listener.onFailure(e);
                    }
                }

                @Override
                public void onClusterServiceClose() {
                    listener.onFailure(new ElasticsearchException(
                        "cluster service was closed while waiting for mapping updates"));
                }

                @Override
                public void onTimeout(TimeValue timeout) {
                    // note that we do not use a timeout (see comment above)
                    listener.onFailure(new ElasticsearchTimeoutException(
                        "timed out waiting for mapping updates (timeout [" + timeout + "])"));
                }
            });
        };
        recoveryTarget.indexTranslogOperations(
            request.operations(),
            request.totalTranslogOps(),
            request.maxSeenAutoIdTimestampOnPrimary(), request.maxSeqNoOfUpdatesOrDeletesOnPrimary(),
            ActionListener.wrap(
                checkpoint -> listener.onResponse(new RecoveryTranslogOperationsResponse(checkpoint)),
                e -> {
                    if (e instanceof MapperException) {
                        retryOnMappingException.accept(e);
                    } else {
                        listener.onFailure(e);
                    }
                })
        );
    }
}
 
Example #16
Source File: TransportClearVotingConfigExclusionsAction.java    From crate with Apache License 2.0 4 votes vote down vote up
@Override
protected void masterOperation(ClearVotingConfigExclusionsRequest request, ClusterState initialState,
                               ActionListener<ClearVotingConfigExclusionsResponse> listener) throws Exception {

    final long startTimeMillis = threadPool.relativeTimeInMillis();

    final Predicate<ClusterState> allExclusionsRemoved = newState -> {
        for (VotingConfigExclusion tombstone : initialState.getVotingConfigExclusions()) {
            // NB checking for the existence of any node with this persistent ID, because persistent IDs are how votes are counted.
            if (newState.nodes().nodeExists(tombstone.getNodeId())) {
                return false;
            }
        }
        return true;
    };

    if (request.getWaitForRemoval() && allExclusionsRemoved.test(initialState) == false) {
        final ClusterStateObserver clusterStateObserver = new ClusterStateObserver(
            initialState,
            clusterService,
            request.getTimeout(),
            logger);

        clusterStateObserver.waitForNextChange(new Listener() {
            @Override
            public void onNewClusterState(ClusterState state) {
                submitClearVotingConfigExclusionsTask(request, startTimeMillis, listener);
            }

            @Override
            public void onClusterServiceClose() {
                listener.onFailure(new ElasticsearchException("cluster service closed while waiting for removal of nodes "
                    + initialState.getVotingConfigExclusions()));
            }

            @Override
            public void onTimeout(TimeValue timeout) {
                listener.onFailure(new ElasticsearchTimeoutException(
                    "timed out waiting for removal of nodes; if nodes should not be removed, set waitForRemoval to false. "
                    + initialState.getVotingConfigExclusions()));
            }
        }, allExclusionsRemoved);
    } else {
        submitClearVotingConfigExclusionsTask(request, startTimeMillis, listener);
    }
}
 
Example #17
Source File: TransportLeaderShardIngestAction.java    From elasticsearch-helper with Apache License 2.0 4 votes vote down vote up
public void start() {
    observer = new ClusterStateObserver(clusterService, request.timeout(), logger);
    doStart();
}
 
Example #18
Source File: TransportReplicaShardIngestAction.java    From elasticsearch-helper with Apache License 2.0 4 votes vote down vote up
public void start() {
    observer = new ClusterStateObserver(clusterService, request.timeout(), logger);
    doStart();
}
 
Example #19
Source File: TransportMasterNodeAction.java    From crate with Apache License 2.0 4 votes vote down vote up
public void start() {
    ClusterState state = clusterService.state();
    this.observer = new ClusterStateObserver(state, clusterService, request.masterNodeTimeout(), logger);
    doStart(state);
}
 
Example #20
Source File: TransportMasterNodeAction.java    From Elasticsearch with Apache License 2.0 4 votes vote down vote up
public void start() {
    this.observer = new ClusterStateObserver(clusterService, request.masterNodeTimeout(), logger);
    doStart();
}
 
Example #21
Source File: TransportInstanceSingleOperationAction.java    From Elasticsearch with Apache License 2.0 4 votes vote down vote up
public void start() {
    this.observer = new ClusterStateObserver(clusterService, request.timeout(), logger);
    doStart();
}