hadoop Platform 源码

  • 2022-10-20
  • 浏览 (192)

haddop Platform 代码

文件路径:/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/java/org/apache/hadoop/mapred/nativetask/Platform.java

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.mapred.nativetask;

import java.io.IOException;
import java.util.HashSet;
import java.util.Set;

import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.nativetask.serde.INativeSerializer;
import org.apache.hadoop.mapred.nativetask.serde.NativeSerialization;

/**
 * Base class for platforms. A platform is a framework running on top of
 * MapReduce, like Hadoop, Hive, Pig, Mahout. Each framework defines its
 * own key type and value type across a MapReduce job. For each platform,
 * we should implement serializers such that we could communicate data with
 * native side and native comparators so our native output collectors could
 * sort them and write out. We've already provided the {@link HadoopPlatform}
 * that supports all key types of Hadoop and users could implement their custom
 * platform.
 */
@InterfaceAudience.Public
@InterfaceStability.Evolving
public abstract class Platform {
  private final NativeSerialization serialization;
  protected Set<String> keyClassNames = new HashSet<String>();

  public Platform() {
    this.serialization = NativeSerialization.getInstance();
  }

  /**
   * initialize a platform, where we should call registerKey
   */
  public abstract void init() throws IOException;

  /**
   * @return name of a Platform, useful for logs and debug
   */
  public abstract String name();


  /**
   * associate a key class with its serializer and platform
   *
   * @param keyClassName map out key class name
   * @param key          key serializer class
   */
  protected void registerKey(String keyClassName, Class<?> key) throws IOException {
    serialization.register(keyClassName, key);
    keyClassNames.add(keyClassName);
  }

  /**
   * whether a platform supports a specific key should at least satisfy two conditions
   *
   * 1. the key belongs to the platform
   * 2. the associated serializer must implement {@link INativeComparable} interface
   *
   *
   * @param keyClassName map out put key class name
   * @param serializer   serializer associated with key via registerKey
   * @param job          job configuration
   * @return             true if the platform has implemented native comparators of the key and
   *                     false otherwise
   */
  protected abstract boolean support(String keyClassName,
      INativeSerializer<?> serializer, JobConf job);


  /**
   * whether it's the platform that has defined a custom Java comparator
   *
   * NativeTask doesn't support custom Java comparators
   * (set with mapreduce.job.output.key.comparator.class)
   * but a platform (e.g Pig) could also set that conf and implement native
   * comparators so we shouldn't bail out.
   *
   * @param keyComparator comparator set with mapreduce.job.output.key.comparator.class
   */
  protected abstract boolean define(Class<?> keyComparator);
}

相关信息

hadoop 源码目录

相关文章

hadoop Command 源码

hadoop CommandDispatcher 源码

hadoop Constants 源码

hadoop DataChannel 源码

hadoop DataReceiver 源码

hadoop HadoopPlatform 源码

hadoop ICombineHandler 源码

hadoop INativeComparable 源码

hadoop INativeHandler 源码

hadoop NativeBatchProcessor 源码

0  赞