hadoop ContainerRelaunch 源码
haddop ContainerRelaunch 代码
文件路径:/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerRelaunch.java
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.exceptions.ConfigurationException;
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
import org.apache.hadoop.yarn.server.nodemanager.Context;
import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEventType;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerExitEvent;
import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerStartContext;
import org.apache.hadoop.yarn.server.security.AMSecretKeys;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.List;
import java.util.Map;
/**
* Relaunch container.
*/
public class ContainerRelaunch extends ContainerLaunch {
private static final Logger LOG =
LoggerFactory.getLogger(ContainerRelaunch.class);
public ContainerRelaunch(Context context, Configuration configuration,
Dispatcher dispatcher, ContainerExecutor exec, Application app,
Container container, LocalDirsHandlerService dirsHandler,
ContainerManagerImpl containerManager) {
super(context, configuration, dispatcher, exec, app, container, dirsHandler,
containerManager);
}
@Override
public Integer call() {
if (!validateContainerState()) {
return 0;
}
ContainerId containerId = container.getContainerId();
String containerIdStr = containerId.toString();
int ret = -1;
Path containerLogDir;
try {
Path containerWorkDir = getContainerWorkDir();
// Clean up container's previous files for container relaunch.
cleanupContainerFiles(containerWorkDir);
containerLogDir = getContainerLogDir();
Map<Path, List<String>> localResources = getLocalizedResources();
String appIdStr = app.getAppId().toString();
Path nmPrivateContainerScriptPath =
getNmPrivateContainerScriptPath(appIdStr, containerIdStr);
Path nmPrivateTokensPath =
getNmPrivateTokensPath(appIdStr, containerIdStr);
Path nmPrivateKeystorePath = (container.getCredentials().getSecretKey(
AMSecretKeys.YARN_APPLICATION_AM_KEYSTORE) == null) ? null :
getNmPrivateKeystorePath(appIdStr, containerIdStr);
Path nmPrivateTruststorePath = (container.getCredentials().getSecretKey(
AMSecretKeys.YARN_APPLICATION_AM_TRUSTSTORE) == null) ? null :
getNmPrivateTruststorePath(appIdStr, containerIdStr);
try {
// try to locate existing pid file.
pidFilePath = getPidFilePath(appIdStr, containerIdStr);
} catch (IOException e) {
// reset pid file path if it did not exist.
String pidFileSubpath = getPidFileSubpath(appIdStr, containerIdStr);
pidFilePath = dirsHandler.getLocalPathForWrite(pidFileSubpath);
}
LOG.info("Relaunch container with "
+ "workDir = " + containerWorkDir.toString()
+ ", logDir = " + containerLogDir.toString()
+ ", nmPrivateContainerScriptPath = "
+ nmPrivateContainerScriptPath.toString()
+ ", nmPrivateTokensPath = " + nmPrivateTokensPath.toString()
+ ", pidFilePath = " + pidFilePath.toString());
List<String> localDirs = dirsHandler.getLocalDirs();
List<String> logDirs = dirsHandler.getLogDirs();
List<String> containerLocalDirs = getContainerLocalDirs(localDirs);
List<String> containerLogDirs = getContainerLogDirs(logDirs);
List<String> filecacheDirs = getNMFilecacheDirs(localDirs);
List<String> userLocalDirs = getUserLocalDirs(localDirs);
List<String> userFilecacheDirs = getUserFilecacheDirs(localDirs);
List<String> applicationLocalDirs = getApplicationLocalDirs(localDirs,
appIdStr);
if (!dirsHandler.areDisksHealthy()) {
ret = ContainerExitStatus.DISKS_FAILED;
throw new IOException("Most of the disks failed. "
+ dirsHandler.getDisksHealthReport(false));
}
ret = relaunchContainer(new ContainerStartContext.Builder()
.setContainer(container)
.setLocalizedResources(localResources)
.setNmPrivateContainerScriptPath(nmPrivateContainerScriptPath)
.setNmPrivateTokensPath(nmPrivateTokensPath)
.setNmPrivateKeystorePath(nmPrivateKeystorePath)
.setNmPrivateTruststorePath(nmPrivateTruststorePath)
.setUser(container.getUser())
.setAppId(appIdStr)
.setContainerWorkDir(containerWorkDir)
.setLocalDirs(localDirs)
.setLogDirs(logDirs)
.setFilecacheDirs(filecacheDirs)
.setUserLocalDirs(userLocalDirs)
.setContainerLocalDirs(containerLocalDirs)
.setContainerLogDirs(containerLogDirs)
.setUserFilecacheDirs(userFilecacheDirs)
.setApplicationLocalDirs(applicationLocalDirs)
.build());
} catch (ConfigurationException e) {
LOG.error("Failed to launch container due to configuration error.", e);
dispatcher.getEventHandler().handle(new ContainerExitEvent(
containerId, ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, ret,
e.getMessage()));
// Mark the node as unhealthy
getContext().getNodeStatusUpdater().reportException(e);
return ret;
} catch (Throwable e) {
LOG.warn("Failed to relaunch container.", e);
dispatcher.getEventHandler().handle(new ContainerExitEvent(
containerId, ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, ret,
e.getMessage()));
return ret;
} finally {
setContainerCompletedStatus(ret);
}
handleContainerExitCode(ret, containerLogDir);
return ret;
}
private Path getContainerLogDir() throws IOException {
String containerLogDir = container.getLogDir();
if (containerLogDir == null || !dirsHandler.isGoodLogDir(containerLogDir)) {
throw new IOException("Could not find a good log dir " + containerLogDir
+ " for container " + container);
}
return new Path(containerLogDir);
}
private Path getNmPrivateContainerScriptPath(String appIdStr,
String containerIdStr) throws IOException {
return dirsHandler.getLocalPathForRead(
getContainerPrivateDir(appIdStr, containerIdStr) + Path.SEPARATOR
+ CONTAINER_SCRIPT);
}
private Path getNmPrivateTokensPath(String appIdStr,
String containerIdStr) throws IOException {
return dirsHandler.getLocalPathForRead(
getContainerPrivateDir(appIdStr, containerIdStr) + Path.SEPARATOR
+ String.format(ContainerExecutor.TOKEN_FILE_NAME_FMT,
containerIdStr));
}
private Path getNmPrivateKeystorePath(String appIdStr,
String containerIdStr) throws IOException {
return dirsHandler.getLocalPathForRead(
getContainerPrivateDir(appIdStr, containerIdStr) + Path.SEPARATOR
+ ContainerLaunch.KEYSTORE_FILE);
}
private Path getNmPrivateTruststorePath(String appIdStr,
String containerIdStr) throws IOException {
return dirsHandler.getLocalPathForRead(
getContainerPrivateDir(appIdStr, containerIdStr) + Path.SEPARATOR
+ ContainerLaunch.TRUSTSTORE_FILE);
}
private Path getPidFilePath(String appIdStr,
String containerIdStr) throws IOException {
return dirsHandler.getLocalPathForRead(
getPidFileSubpath(appIdStr, containerIdStr));
}
}
相关信息
相关文章
hadoop AbstractContainersLauncher 源码
hadoop ContainersLauncherEvent 源码
hadoop ContainersLauncherEventType 源码
hadoop RecoverPausedContainerLaunch 源码
hadoop RecoveredContainerLaunch 源码
0
赞
热门推荐
-
2、 - 优质文章
-
3、 gate.io
-
8、 golang
-
9、 openharmony
-
10、 Vue中input框自动聚焦