kafka TopicBasedRemoteLogMetadataManager 源码
kafka TopicBasedRemoteLogMetadataManager 代码
文件路径:/storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/TopicBasedRemoteLogMetadataManager.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.server.log.remote.metadata.storage;
import org.apache.kafka.clients.admin.AdminClient;
import org.apache.kafka.clients.admin.NewTopic;
import org.apache.kafka.clients.admin.TopicDescription;
import org.apache.kafka.clients.producer.RecordMetadata;
import org.apache.kafka.common.KafkaException;
import org.apache.kafka.common.TopicIdPartition;
import org.apache.kafka.common.config.TopicConfig;
import org.apache.kafka.common.errors.RetriableException;
import org.apache.kafka.common.errors.TopicExistsException;
import org.apache.kafka.common.internals.FatalExitError;
import org.apache.kafka.common.utils.KafkaThread;
import org.apache.kafka.common.utils.Time;
import org.apache.kafka.common.utils.Utils;
import org.apache.kafka.server.log.remote.storage.RemoteLogMetadata;
import org.apache.kafka.server.log.remote.storage.RemoteLogMetadataManager;
import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentMetadata;
import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentMetadataUpdate;
import org.apache.kafka.server.log.remote.storage.RemoteLogSegmentState;
import org.apache.kafka.server.log.remote.storage.RemotePartitionDeleteMetadata;
import org.apache.kafka.server.log.remote.storage.RemoteStorageException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.time.Duration;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.locks.ReentrantReadWriteLock;
/**
* This is the {@link RemoteLogMetadataManager} implementation with storage as an internal topic with name {@link TopicBasedRemoteLogMetadataManagerConfig#REMOTE_LOG_METADATA_TOPIC_NAME}.
* This is used to publish and fetch {@link RemoteLogMetadata} for the registered user topic partitions with
* {@link #onPartitionLeadershipChanges(Set, Set)}. Each broker will have an instance of this class and it subscribes
* to metadata updates for the registered user topic partitions.
*/
public class TopicBasedRemoteLogMetadataManager implements RemoteLogMetadataManager {
private static final Logger log = LoggerFactory.getLogger(TopicBasedRemoteLogMetadataManager.class);
private volatile boolean configured = false;
// It indicates whether the close process of this instance is started or not via #close() method.
// Using AtomicBoolean instead of volatile as it may encounter http://findbugs.sourceforge.net/bugDescriptions.html#SP_SPIN_ON_FIELD
// if the field is read but not updated in a spin loop like in #initializeResources() method.
private final AtomicBoolean closing = new AtomicBoolean(false);
private final AtomicBoolean initialized = new AtomicBoolean(false);
private final Time time = Time.SYSTEM;
private final boolean startConsumerThread;
private Thread initializationThread;
private volatile ProducerManager producerManager;
private volatile ConsumerManager consumerManager;
// This allows to gracefully close this instance using {@link #close()} method while there are some pending or new
// requests calling different methods which use the resources like producer/consumer managers.
private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
private RemotePartitionMetadataStore remotePartitionMetadataStore;
private volatile TopicBasedRemoteLogMetadataManagerConfig rlmmConfig;
private volatile RemoteLogMetadataTopicPartitioner rlmmTopicPartitioner;
private final Set<TopicIdPartition> pendingAssignPartitions = Collections.synchronizedSet(new HashSet<>());
private volatile boolean initializationFailed;
public TopicBasedRemoteLogMetadataManager() {
this(true);
}
// Visible for testing.
public TopicBasedRemoteLogMetadataManager(boolean startConsumerThread) {
this.startConsumerThread = startConsumerThread;
}
@Override
public CompletableFuture<Void> addRemoteLogSegmentMetadata(RemoteLogSegmentMetadata remoteLogSegmentMetadata)
throws RemoteStorageException {
Objects.requireNonNull(remoteLogSegmentMetadata, "remoteLogSegmentMetadata can not be null");
// This allows gracefully rejecting the requests while closing of this instance is in progress, which triggers
// closing the producer/consumer manager instances.
lock.readLock().lock();
try {
ensureInitializedAndNotClosed();
// This method is allowed only to add remote log segment with the initial state(which is RemoteLogSegmentState.COPY_SEGMENT_STARTED)
// but not to update the existing remote log segment metadata.
if (remoteLogSegmentMetadata.state() != RemoteLogSegmentState.COPY_SEGMENT_STARTED) {
throw new IllegalArgumentException(
"Given remoteLogSegmentMetadata should have state as " + RemoteLogSegmentState.COPY_SEGMENT_STARTED
+ " but it contains state as: " + remoteLogSegmentMetadata.state());
}
// Publish the message to the topic.
return storeRemoteLogMetadata(remoteLogSegmentMetadata.remoteLogSegmentId().topicIdPartition(),
remoteLogSegmentMetadata);
} finally {
lock.readLock().unlock();
}
}
@Override
public CompletableFuture<Void> updateRemoteLogSegmentMetadata(RemoteLogSegmentMetadataUpdate segmentMetadataUpdate)
throws RemoteStorageException {
Objects.requireNonNull(segmentMetadataUpdate, "segmentMetadataUpdate can not be null");
lock.readLock().lock();
try {
ensureInitializedAndNotClosed();
// Callers should use addRemoteLogSegmentMetadata to add RemoteLogSegmentMetadata with state as
// RemoteLogSegmentState.COPY_SEGMENT_STARTED.
if (segmentMetadataUpdate.state() == RemoteLogSegmentState.COPY_SEGMENT_STARTED) {
throw new IllegalArgumentException("Given remoteLogSegmentMetadata should not have the state as: "
+ RemoteLogSegmentState.COPY_SEGMENT_STARTED);
}
// Publish the message to the topic.
return storeRemoteLogMetadata(segmentMetadataUpdate.remoteLogSegmentId().topicIdPartition(), segmentMetadataUpdate);
} finally {
lock.readLock().unlock();
}
}
@Override
public CompletableFuture<Void> putRemotePartitionDeleteMetadata(RemotePartitionDeleteMetadata remotePartitionDeleteMetadata)
throws RemoteStorageException {
Objects.requireNonNull(remotePartitionDeleteMetadata, "remotePartitionDeleteMetadata can not be null");
lock.readLock().lock();
try {
ensureInitializedAndNotClosed();
return storeRemoteLogMetadata(remotePartitionDeleteMetadata.topicIdPartition(), remotePartitionDeleteMetadata);
} finally {
lock.readLock().unlock();
}
}
/**
* Returns {@link CompletableFuture} which will complete only after publishing of the given {@code remoteLogMetadata} into
* the remote log metadata topic and the internal consumer is caught up until the produced record's offset.
*
* @param topicIdPartition partition of the given remoteLogMetadata.
* @param remoteLogMetadata RemoteLogMetadata to be stored.
* @return
* @throws RemoteStorageException if there are any storage errors occur.
*/
private CompletableFuture<Void> storeRemoteLogMetadata(TopicIdPartition topicIdPartition,
RemoteLogMetadata remoteLogMetadata)
throws RemoteStorageException {
log.debug("Storing metadata for partition: [{}] with context: [{}]", topicIdPartition, remoteLogMetadata);
try {
// Publish the message to the metadata topic.
CompletableFuture<RecordMetadata> produceFuture = producerManager.publishMessage(remoteLogMetadata);
// Create and return a `CompletableFuture` instance which completes when the consumer is caught up with the produced record's offset.
return produceFuture.thenApplyAsync(recordMetadata -> {
try {
consumerManager.waitTillConsumptionCatchesUp(recordMetadata);
} catch (TimeoutException e) {
throw new KafkaException(e);
}
return null;
});
} catch (KafkaException e) {
if (e instanceof RetriableException) {
throw e;
} else {
throw new RemoteStorageException(e);
}
}
}
@Override
public Optional<RemoteLogSegmentMetadata> remoteLogSegmentMetadata(TopicIdPartition topicIdPartition,
int epochForOffset,
long offset)
throws RemoteStorageException {
lock.readLock().lock();
try {
ensureInitializedAndNotClosed();
return remotePartitionMetadataStore.remoteLogSegmentMetadata(topicIdPartition, offset, epochForOffset);
} finally {
lock.readLock().unlock();
}
}
@Override
public Optional<Long> highestOffsetForEpoch(TopicIdPartition topicIdPartition,
int leaderEpoch)
throws RemoteStorageException {
lock.readLock().lock();
try {
ensureInitializedAndNotClosed();
return remotePartitionMetadataStore.highestLogOffset(topicIdPartition, leaderEpoch);
} finally {
lock.readLock().unlock();
}
}
@Override
public Iterator<RemoteLogSegmentMetadata> listRemoteLogSegments(TopicIdPartition topicIdPartition)
throws RemoteStorageException {
Objects.requireNonNull(topicIdPartition, "topicIdPartition can not be null");
lock.readLock().lock();
try {
ensureInitializedAndNotClosed();
return remotePartitionMetadataStore.listRemoteLogSegments(topicIdPartition);
} finally {
lock.readLock().unlock();
}
}
@Override
public Iterator<RemoteLogSegmentMetadata> listRemoteLogSegments(TopicIdPartition topicIdPartition, int leaderEpoch)
throws RemoteStorageException {
Objects.requireNonNull(topicIdPartition, "topicIdPartition can not be null");
lock.readLock().lock();
try {
ensureInitializedAndNotClosed();
return remotePartitionMetadataStore.listRemoteLogSegments(topicIdPartition, leaderEpoch);
} finally {
lock.readLock().unlock();
}
}
public int metadataPartition(TopicIdPartition topicIdPartition) {
return rlmmTopicPartitioner.metadataPartition(topicIdPartition);
}
// Visible For Testing
public Optional<Long> receivedOffsetForPartition(int metadataPartition) {
return consumerManager.receivedOffsetForPartition(metadataPartition);
}
@Override
public void onPartitionLeadershipChanges(Set<TopicIdPartition> leaderPartitions,
Set<TopicIdPartition> followerPartitions) {
Objects.requireNonNull(leaderPartitions, "leaderPartitions can not be null");
Objects.requireNonNull(followerPartitions, "followerPartitions can not be null");
log.info("Received leadership notifications with leader partitions {} and follower partitions {}",
leaderPartitions, followerPartitions);
lock.readLock().lock();
try {
if (closing.get()) {
throw new IllegalStateException("This instance is in closing state");
}
HashSet<TopicIdPartition> allPartitions = new HashSet<>(leaderPartitions);
allPartitions.addAll(followerPartitions);
if (!initialized.get()) {
// If it is not yet initialized, then keep them as pending partitions and assign them
// when it is initialized successfully in initializeResources().
this.pendingAssignPartitions.addAll(allPartitions);
} else {
assignPartitions(allPartitions);
}
} finally {
lock.readLock().unlock();
}
}
private void assignPartitions(Set<TopicIdPartition> allPartitions) {
for (TopicIdPartition partition : allPartitions) {
remotePartitionMetadataStore.maybeLoadPartition(partition);
}
consumerManager.addAssignmentsForPartitions(allPartitions);
}
@Override
public void onStopPartitions(Set<TopicIdPartition> partitions) {
lock.readLock().lock();
try {
if (closing.get()) {
throw new IllegalStateException("This instance is in closing state");
}
if (!initialized.get()) {
// If it is not yet initialized, then remove them from the pending partitions if any.
if (!pendingAssignPartitions.isEmpty()) {
pendingAssignPartitions.removeAll(partitions);
}
} else {
consumerManager.removeAssignmentsForPartitions(partitions);
}
} finally {
lock.readLock().unlock();
}
}
@Override
public void configure(Map<String, ?> configs) {
Objects.requireNonNull(configs, "configs can not be null.");
lock.writeLock().lock();
try {
if (configured) {
log.info("Skipping configure as it is already configured.");
return;
}
log.info("Started initializing with configs: {}", configs);
rlmmConfig = new TopicBasedRemoteLogMetadataManagerConfig(configs);
rlmmTopicPartitioner = new RemoteLogMetadataTopicPartitioner(rlmmConfig.metadataTopicPartitionsCount());
remotePartitionMetadataStore = new RemotePartitionMetadataStore(new File(rlmmConfig.logDir()).toPath());
configured = true;
log.info("Successfully initialized with rlmmConfig: {}", rlmmConfig);
// Scheduling the initialization producer/consumer managers in a separate thread. Required resources may
// not yet be available now. This thread makes sure that it is retried at regular intervals until it is
// successful.
initializationThread = KafkaThread.nonDaemon("RLMMInitializationThread", () -> initializeResources());
initializationThread.start();
} finally {
lock.writeLock().unlock();
}
}
private void initializeResources() {
log.info("Initializing the resources.");
final NewTopic remoteLogMetadataTopicRequest = createRemoteLogMetadataTopicRequest();
boolean topicCreated = false;
long startTimeMs = time.milliseconds();
AdminClient adminClient = null;
try {
adminClient = AdminClient.create(rlmmConfig.producerProperties());
// Stop if it is already initialized or closing.
while (!(initialized.get() || closing.get())) {
// If it is timed out then raise an error to exit.
if (time.milliseconds() - startTimeMs > rlmmConfig.initializationRetryMaxTimeoutMs()) {
log.error("Timed out in initializing the resources, retried to initialize the resource for [{}] ms.",
rlmmConfig.initializationRetryMaxTimeoutMs());
initializationFailed = true;
return;
}
if (!topicCreated) {
topicCreated = createTopic(adminClient, remoteLogMetadataTopicRequest);
}
if (!topicCreated) {
// Sleep for INITIALIZATION_RETRY_INTERVAL_MS before trying to create the topic again.
log.info("Sleep for : {} ms before it is retried again.", rlmmConfig.initializationRetryIntervalMs());
Utils.sleep(rlmmConfig.initializationRetryIntervalMs());
continue;
} else {
// If topic is already created, validate the existing topic partitions.
try {
String topicName = remoteLogMetadataTopicRequest.name();
// If the existing topic partition size is not same as configured, mark initialization as failed and exit.
if (!isPartitionsCountSameAsConfigured(adminClient, topicName)) {
initializationFailed = true;
}
} catch (Exception e) {
log.info("Sleep for : {} ms before it is retried again.", rlmmConfig.initializationRetryIntervalMs());
Utils.sleep(rlmmConfig.initializationRetryIntervalMs());
continue;
}
}
// Create producer and consumer managers.
lock.writeLock().lock();
try {
producerManager = new ProducerManager(rlmmConfig, rlmmTopicPartitioner);
consumerManager = new ConsumerManager(rlmmConfig, remotePartitionMetadataStore, rlmmTopicPartitioner, time);
if (startConsumerThread) {
consumerManager.startConsumerThread();
} else {
log.info("RLMM Consumer task thread is not configured to be started.");
}
if (!pendingAssignPartitions.isEmpty()) {
assignPartitions(pendingAssignPartitions);
pendingAssignPartitions.clear();
}
initialized.set(true);
log.info("Initialized resources successfully.");
} catch (Exception e) {
log.error("Encountered error while initializing producer/consumer", e);
return;
} finally {
lock.writeLock().unlock();
}
}
} finally {
if (adminClient != null) {
try {
adminClient.close(Duration.ofSeconds(10));
} catch (Exception e) {
// Ignore the error.
log.debug("Error occurred while closing the admin client", e);
}
}
}
}
private boolean isPartitionsCountSameAsConfigured(AdminClient adminClient,
String topicName) throws InterruptedException, ExecutionException {
log.debug("Getting topic details to check for partition count and replication factor.");
TopicDescription topicDescription = adminClient.describeTopics(Collections.singleton(topicName))
.topicNameValues().get(topicName).get();
int expectedPartitions = rlmmConfig.metadataTopicPartitionsCount();
int topicPartitionsSize = topicDescription.partitions().size();
if (topicPartitionsSize != expectedPartitions) {
log.error("Existing topic partition count [{}] is not same as the expected partition count [{}]",
topicPartitionsSize, expectedPartitions);
return false;
}
return true;
}
private NewTopic createRemoteLogMetadataTopicRequest() {
Map<String, String> topicConfigs = new HashMap<>();
topicConfigs.put(TopicConfig.RETENTION_MS_CONFIG, Long.toString(rlmmConfig.metadataTopicRetentionMs()));
topicConfigs.put(TopicConfig.CLEANUP_POLICY_CONFIG, TopicConfig.CLEANUP_POLICY_DELETE);
return new NewTopic(rlmmConfig.remoteLogMetadataTopicName(),
rlmmConfig.metadataTopicPartitionsCount(),
rlmmConfig.metadataTopicReplicationFactor()).configs(topicConfigs);
}
/**
* @param topic topic to be created.
* @return Returns true if the topic already exists or it is created successfully.
*/
private boolean createTopic(AdminClient adminClient, NewTopic topic) {
boolean topicCreated = false;
try {
adminClient.createTopics(Collections.singleton(topic)).all().get();
topicCreated = true;
} catch (Exception e) {
if (e.getCause() instanceof TopicExistsException) {
log.info("Topic [{}] already exists", topic.name());
topicCreated = true;
} else {
log.error("Encountered error while creating remote log metadata topic.", e);
}
}
return topicCreated;
}
public boolean isInitialized() {
return initialized.get();
}
private void ensureInitializedAndNotClosed() {
if (initializationFailed) {
// If initialization is failed, shutdown the broker.
throw new FatalExitError();
}
if (closing.get() || !initialized.get()) {
throw new IllegalStateException("This instance is in invalid state, initialized: " + initialized +
" close: " + closing);
}
}
// Visible for testing.
public TopicBasedRemoteLogMetadataManagerConfig config() {
return rlmmConfig;
}
// Visible for testing.
public void startConsumerThread() {
if (consumerManager != null) {
consumerManager.startConsumerThread();
}
}
@Override
public void close() throws IOException {
// Close all the resources.
log.info("Closing the resources.");
if (closing.compareAndSet(false, true)) {
lock.writeLock().lock();
try {
if (initializationThread != null) {
try {
initializationThread.join();
} catch (InterruptedException e) {
log.error("Initialization thread was interrupted while waiting to join on close.", e);
}
}
Utils.closeQuietly(producerManager, "ProducerTask");
Utils.closeQuietly(consumerManager, "RLMMConsumerManager");
Utils.closeQuietly(remotePartitionMetadataStore, "RemotePartitionMetadataStore");
} finally {
lock.writeLock().unlock();
log.info("Closed the resources.");
}
}
}
}
相关信息
相关文章
kafka FileBasedRemoteLogMetadataCache 源码
kafka RemoteLogLeaderEpochState 源码
kafka RemoteLogMetadataCache 源码
kafka RemoteLogMetadataSnapshotFile 源码
0
赞