hadoop FileDistributionVisitor 源码

  • 2022-10-20
  • 浏览 (197)

haddop FileDistributionVisitor 代码

文件路径:/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionVisitor.java

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hdfs.tools.offlineImageViewer;

import java.io.IOException;
import java.util.LinkedList;

import org.apache.hadoop.util.StringUtils;

/**
 * File size distribution visitor.
 * 
 * <h3>Description.</h3>
 * This is the tool for analyzing file sizes in the namespace image.
 * In order to run the tool one should define a range of integers
 * <tt>[0, maxSize]</tt> by specifying <tt>maxSize</tt> and a <tt>step</tt>.
 * The range of integers is divided into segments of size <tt>step</tt>: 
 * <tt>[0, s<sub>1</sub>, ..., s<sub>n-1</sub>, maxSize]</tt>,
 * and the visitor calculates how many files in the system fall into 
 * each segment <tt>[s<sub>i-1</sub>, s<sub>i</sub>)</tt>. 
 * Note that files larger than <tt>maxSize</tt> always fall into 
 * the very last segment.
 * 
 * <h3>Input.</h3>
 * <ul>
 * <li><tt>filename</tt> specifies the location of the image file;</li>
 * <li><tt>maxSize</tt> determines the range <tt>[0, maxSize]</tt> of files
 * sizes considered by the visitor;</li>
 * <li><tt>step</tt> the range is divided into segments of size step.</li>
 * </ul>
 *
 * <h3>Output.</h3>
 * The output file is formatted as a tab separated two column table:
 * Size and NumFiles. Where Size represents the start of the segment,
 * and numFiles is the number of files form the image which size falls in 
 * this segment.
 */
class FileDistributionVisitor extends TextWriterImageVisitor {
  final private LinkedList<ImageElement> elemS = new LinkedList<ImageElement>();

  private final static long MAX_SIZE_DEFAULT = 0x2000000000L;   // 1/8 TB = 2^37
  private final static int INTERVAL_DEFAULT = 0x200000;         // 2 MB = 2^21

  private int[] distribution;
  private long maxSize;
  private int step;

  private int totalFiles;
  private int totalDirectories;
  private int totalBlocks;
  private long totalSpace;
  private long maxFileSize;

  private FileContext current;

  private boolean inInode = false;
  private boolean formatOutput = false;

  /**
   * File or directory information.
   */
  private static class FileContext {
    String path;
    long fileSize;
    int numBlocks;
    int replication;
  }

  public FileDistributionVisitor(String filename, long maxSize, int step,
      boolean formatOutput) throws IOException {
    super(filename, false);
    this.maxSize = (maxSize == 0 ? MAX_SIZE_DEFAULT : maxSize);
    this.step = (step == 0 ? INTERVAL_DEFAULT : step);
    this.formatOutput = formatOutput;
    long numIntervals = this.maxSize / this.step;
    if(numIntervals >= Integer.MAX_VALUE)
      throw new IOException("Too many distribution intervals " + numIntervals);
    this.distribution = new int[1 + (int)(numIntervals)];
    this.totalFiles = 0;
    this.totalDirectories = 0;
    this.totalBlocks = 0;
    this.totalSpace = 0;
    this.maxFileSize = 0;
  }

  @Override
  void start() throws IOException {}

  @Override
  void finish() throws IOException {
    output();
    super.finish();
  }

  @Override
  void finishAbnormally() throws IOException {
    System.out.println("*** Image processing finished abnormally.  Ending ***");
    output();
    super.finishAbnormally();
  }

  private void output() throws IOException {
    // write the distribution into the output file
    write((formatOutput ? "Size Range" : "Size") + "\tNumFiles\n");
    for (int i = 0; i < distribution.length; i++) {
      if (distribution[i] > 0) {
        if (formatOutput) {
          write((i == 0 ? "[" : "(")
              + StringUtils.byteDesc(((long) (i == 0 ? 0 : i - 1) * step))
              + ", "
              + StringUtils.byteDesc((long)
                  (i == distribution.length - 1 ? maxFileSize : i * step))
                  + "]\t"
              + distribution[i] + "\n");
        } else {
          write(((long) i * step) + "\t" + distribution[i] + "\n");
        }
      }
    }
    System.out.println("totalFiles = " + totalFiles);
    System.out.println("totalDirectories = " + totalDirectories);
    System.out.println("totalBlocks = " + totalBlocks);
    System.out.println("totalSpace = " + totalSpace);
    System.out.println("maxFileSize = " + maxFileSize);
  }

  @Override
  void leaveEnclosingElement() throws IOException {
    ImageElement elem = elemS.pop();

    if(elem != ImageElement.INODE &&
       elem != ImageElement.INODE_UNDER_CONSTRUCTION)
      return;
    inInode = false;
    if(current.numBlocks < 0) {
      totalDirectories ++;
      return;
    }
    totalFiles++;
    totalBlocks += current.numBlocks;
    totalSpace += current.fileSize * current.replication;
    if(maxFileSize < current.fileSize)
      maxFileSize = current.fileSize;
    int high;
    if(current.fileSize > maxSize)
      high = distribution.length-1;
    else
      high = (int)Math.ceil((double)current.fileSize / step);

    if (high >= distribution.length) {
      high = distribution.length - 1;
    }
    distribution[high]++;
    if(totalFiles % 1000000 == 1)
      System.out.println("Files processed: " + totalFiles
          + "  Current: " + current.path);
  }

  @Override
  void visit(ImageElement element, String value) throws IOException {
    if(inInode) {
      switch(element) {
      case INODE_PATH:
        current.path = (value.equals("") ? "/" : value);
        break;
      case REPLICATION:
        current.replication = Integer.parseInt(value);
        break;
      case NUM_BYTES:
        current.fileSize += Long.parseLong(value);
        break;
      default:
        break;
      }
    }
  }

  @Override
  void visitEnclosingElement(ImageElement element) throws IOException {
    elemS.push(element);
    if(element == ImageElement.INODE ||
       element == ImageElement.INODE_UNDER_CONSTRUCTION) {
      current = new FileContext();
      inInode = true;
    }
  }

  @Override
  void visitEnclosingElement(ImageElement element,
      ImageElement key, String value) throws IOException {
    elemS.push(element);
    if(element == ImageElement.INODE ||
       element == ImageElement.INODE_UNDER_CONSTRUCTION)
      inInode = true;
    else if(element == ImageElement.BLOCKS)
      current.numBlocks = Integer.parseInt(value);
  }
}

相关信息

hadoop 源码目录

相关文章

hadoop DelimitedImageVisitor 源码

hadoop DepthCounter 源码

hadoop FSImageHandler 源码

hadoop FSImageLoader 源码

hadoop FileDistributionCalculator 源码

hadoop IgnoreSnapshotException 源码

hadoop ImageLoader 源码

hadoop ImageLoaderCurrent 源码

hadoop ImageVisitor 源码

hadoop IndentedImageVisitor 源码

0  赞