hadoop CrcUtil 源码

  • 2022-10-20
  • 浏览 (295)

haddop CrcUtil 代码

文件路径:/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CrcUtil.java

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.util;

import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;

import java.io.IOException;
import java.util.Arrays;

/**
 * This class provides utilities for working with CRCs.
 */
@InterfaceAudience.LimitedPrivate({"Common", "HDFS", "MapReduce", "Yarn"})
@InterfaceStability.Unstable
public final class CrcUtil {
  public static final int MULTIPLICATIVE_IDENTITY = 0x80000000;
  public static final int GZIP_POLYNOMIAL = 0xEDB88320;
  public static final int CASTAGNOLI_POLYNOMIAL = 0x82F63B78;

  /**
   * Hide default constructor for a static utils class.
   */
  private CrcUtil() {
  }

  /**
   * Compute x^({@code lengthBytes} * 8) mod {@code mod}, where {@code mod} is
   * in "reversed" (little-endian) format such that {@code mod & 1} represents
   * x^31 and has an implicit term x^32.
   *
   * @param lengthBytes lengthBytes.
   * @param mod mod.
   * @return monomial.
   */
  public static int getMonomial(long lengthBytes, int mod) {
    if (lengthBytes == 0) {
      return MULTIPLICATIVE_IDENTITY;
    } else if (lengthBytes < 0) {
      throw new IllegalArgumentException(
          "lengthBytes must be positive, got " + lengthBytes);
    }

    // Decompose into
    // x^degree == x ^ SUM(bit[i] * 2^i) == PRODUCT(x ^ (bit[i] * 2^i))
    // Generate each x^(2^i) by squaring.
    // Since 'degree' is in 'bits', but we only need to support byte
    // granularity we can begin with x^8.
    int multiplier = MULTIPLICATIVE_IDENTITY >>> 8;
    int product = MULTIPLICATIVE_IDENTITY;
    long degree = lengthBytes;
    while (degree > 0) {
      if ((degree & 1) != 0) {
        product = (product == MULTIPLICATIVE_IDENTITY) ? multiplier :
            galoisFieldMultiply(product, multiplier, mod);
      }
      multiplier = galoisFieldMultiply(multiplier, multiplier, mod);
      degree >>= 1;
    }
    return product;
  }

  /**
   * composeWithMonomial.
   *
   * @param crcA crcA.
   * @param crcB crcB.
   * @param monomial Precomputed x^(lengthBInBytes * 8) mod {@code mod}
   * @param mod mod.
   * @return compose with monomial.
   */
  public static int composeWithMonomial(
      int crcA, int crcB, int monomial, int mod) {
    return galoisFieldMultiply(crcA, monomial, mod) ^ crcB;
  }

  /**
   * compose.
   *
   * @param crcA crcA.
   * @param crcB crcB.
   * @param lengthB length of content corresponding to {@code crcB}, in bytes.
   * @param mod mod.
   * @return compose result.
   */
  public static int compose(int crcA, int crcB, long lengthB, int mod) {
    int monomial = getMonomial(lengthB, mod);
    return composeWithMonomial(crcA, crcB, monomial, mod);
  }

  /**
   * @return 4-byte array holding the big-endian representation of
   *     {@code value}.
   *
   * @param value value.
   */
  public static byte[] intToBytes(int value) {
    byte[] buf = new byte[4];
    try {
      writeInt(buf, 0, value);
    } catch (IOException ioe) {
      // Since this should only be able to occur from code bugs within this
      // class rather than user input, we throw as a RuntimeException
      // rather than requiring this method to declare throwing IOException
      // for something the caller can't control.
      throw new RuntimeException(ioe);
    }
    return buf;
  }

  /**
   * Writes big-endian representation of {@code value} into {@code buf}
   * starting at {@code offset}. buf.length must be greater than or
   * equal to offset + 4.
   *
   * @param buf buf size.
   * @param offset offset.
   * @param value value.
   * @throws IOException raised on errors performing I/O.
   */
  public static void writeInt(byte[] buf, int offset, int value)
      throws IOException {
    if (offset + 4  > buf.length) {
      throw new IOException(String.format(
          "writeInt out of bounds: buf.length=%d, offset=%d",
          buf.length, offset));
    }
    buf[offset + 0] = (byte)((value >>> 24) & 0xff);
    buf[offset + 1] = (byte)((value >>> 16) & 0xff);
    buf[offset + 2] = (byte)((value >>> 8) & 0xff);
    buf[offset + 3] = (byte)(value & 0xff);
  }

  /**
   * Reads 4-byte big-endian int value from {@code buf} starting at
   * {@code offset}. buf.length must be greater than or equal to offset + 4.
   *
   * @param offset offset.
   * @param buf buf.
   * @return int.
   * @throws IOException raised on errors performing I/O.
   */
  public static int readInt(byte[] buf, int offset)
      throws IOException {
    if (offset + 4  > buf.length) {
      throw new IOException(String.format(
          "readInt out of bounds: buf.length=%d, offset=%d",
          buf.length, offset));
    }
    int value = ((buf[offset + 0] & 0xff) << 24) |
                ((buf[offset + 1] & 0xff) << 16) |
                ((buf[offset + 2] & 0xff) << 8)  |
                ((buf[offset + 3] & 0xff));
    return value;
  }

  /**
   * For use with debug statements; verifies bytes.length on creation,
   * expecting it to represent exactly one CRC, and returns a hex
   * formatted value.
   *
   * @param bytes bytes.
   * @throws IOException raised on errors performing I/O.
   * @return a list of hex formatted values.
   */
  public static String toSingleCrcString(final byte[] bytes)
      throws IOException {
    if (bytes.length != 4) {
      throw new IOException((String.format(
          "Unexpected byte[] length '%d' for single CRC. Contents: %s",
          bytes.length, Arrays.toString(bytes))));
    }
    return String.format("0x%08x", readInt(bytes, 0));
  }

  /**
   * For use with debug statements; verifies bytes.length on creation,
   * expecting it to be divisible by CRC byte size, and returns a list of
   * hex formatted values.
   *
   * @param bytes bytes.
   * @throws IOException raised on errors performing I/O.
   * @return a list of hex formatted values.
   */
  public static String toMultiCrcString(final byte[] bytes)
      throws IOException {
    if (bytes.length % 4 != 0) {
      throw new IOException((String.format(
          "Unexpected byte[] length '%d' not divisible by 4. Contents: %s",
          bytes.length, Arrays.toString(bytes))));
    }
    StringBuilder sb = new StringBuilder();
    sb.append('[');
    for (int i = 0; i < bytes.length; i += 4) {
      sb.append(String.format("0x%08x", readInt(bytes, i)));
      if (i != bytes.length - 4) {
        sb.append(", ");
      }
    }
    sb.append(']');
    return sb.toString();
  }

  /**
   * Galois field multiplication of {@code p} and {@code q} with the
   * generator polynomial {@code m} as the modulus.
   *
   * @param m The little-endian polynomial to use as the modulus when
   *     multiplying p and q, with implicit "1" bit beyond the bottom bit.
   */
  private static int galoisFieldMultiply(int p, int q, int m) {
    int summation = 0;

    // Top bit is the x^0 place; each right-shift increments the degree of the
    // current term.
    int curTerm = MULTIPLICATIVE_IDENTITY;

    // Iteratively multiply p by x mod m as we go to represent the q[i] term
    // (of degree x^i) times p.
    int px = p;

    while (curTerm != 0) {
      if ((q & curTerm) != 0) {
        summation ^= px;
      }

      // Bottom bit represents highest degree since we're little-endian; before
      // we multiply by "x" for the next term, check bottom bit to know whether
      // the resulting px will thus have a term matching the implicit "1" term
      // of "m" and thus will need to subtract "m" after mutiplying by "x".
      boolean hasMaxDegree = ((px & 1) != 0);
      px >>>= 1;
      if (hasMaxDegree) {
        px ^= m;
      }
      curTerm >>>= 1;
    }
    return summation;
  }
}

相关信息

hadoop 源码目录

相关文章

hadoop ApplicationClassLoader 源码

hadoop AsyncDiskService 源码

hadoop AutoCloseableLock 源码

hadoop BasicDiskValidator 源码

hadoop BlockingThreadPoolExecutorService 源码

hadoop CacheableIPList 源码

hadoop ChunkedArrayList 源码

hadoop ClassUtil 源码

hadoop Classpath 源码

hadoop CleanerUtil 源码

0  赞