org.apache.hadoop.fs.FileSystem#getDefaultBlockSize

Source File: TestCombineFileInputFormat.java From hadoop with Apache License 2.0

6 votes

private void splitRealFiles(String[] args) throws IOException {
  Configuration conf = new Configuration();
  Job job = Job.getInstance();
  FileSystem fs = FileSystem.get(conf);
  if (!(fs instanceof DistributedFileSystem)) {
    throw new IOException("Wrong file system: " + fs.getClass().getName());
  }
  long blockSize = fs.getDefaultBlockSize();

  DummyInputFormat inFormat = new DummyInputFormat();
  for (int i = 0; i < args.length; i++) {
    FileInputFormat.addInputPaths(job, args[i]);
  }
  inFormat.setMinSplitSizeRack(blockSize);
  inFormat.setMaxSplitSize(10 * blockSize);

  List<InputSplit> splits = inFormat.getSplits(job);
  System.out.println("Total number of splits " + splits.size());
  for (int i = 0; i < splits.size(); ++i) {
    CombineFileSplit fileSplit = (CombineFileSplit) splits.get(i);
    System.out.println("Split[" + i + "] " + fileSplit);
  }
}

Source File: FSOperations.java From hadoop with Apache License 2.0

6 votes

/**
 * Executes the filesystem operation.
 *
 * @param fs filesystem instance to use.
 *
 * @return The URI of the created file.
 *
 * @throws IOException thrown if an IO error occured.
 */
@Override
public Void execute(FileSystem fs) throws IOException {
  if (replication == -1) {
    replication = fs.getDefaultReplication(path);
  }
  if (blockSize == -1) {
    blockSize = fs.getDefaultBlockSize(path);
  }
  FsPermission fsPermission = new FsPermission(permission);
  int bufferSize = fs.getConf().getInt("httpfs.buffer.size", 4096);
  OutputStream os = fs.create(path, fsPermission, override, bufferSize, replication, blockSize, null);
  IOUtils.copyBytes(is, os, bufferSize, true);
  os.close();
  return null;
}

Source File: TestCombineFileInputFormat.java From big-c with Apache License 2.0

6 votes

private void splitRealFiles(String[] args) throws IOException {
  Configuration conf = new Configuration();
  Job job = Job.getInstance();
  FileSystem fs = FileSystem.get(conf);
  if (!(fs instanceof DistributedFileSystem)) {
    throw new IOException("Wrong file system: " + fs.getClass().getName());
  }
  long blockSize = fs.getDefaultBlockSize();

  DummyInputFormat inFormat = new DummyInputFormat();
  for (int i = 0; i < args.length; i++) {
    FileInputFormat.addInputPaths(job, args[i]);
  }
  inFormat.setMinSplitSizeRack(blockSize);
  inFormat.setMaxSplitSize(10 * blockSize);

  List<InputSplit> splits = inFormat.getSplits(job);
  System.out.println("Total number of splits " + splits.size());
  for (int i = 0; i < splits.size(); ++i) {
    CombineFileSplit fileSplit = (CombineFileSplit) splits.get(i);
    System.out.println("Split[" + i + "] " + fileSplit);
  }
}

Source File: FSOperations.java From big-c with Apache License 2.0

6 votes

/**
 * Executes the filesystem operation.
 *
 * @param fs filesystem instance to use.
 *
 * @return The URI of the created file.
 *
 * @throws IOException thrown if an IO error occured.
 */
@Override
public Void execute(FileSystem fs) throws IOException {
  if (replication == -1) {
    replication = fs.getDefaultReplication(path);
  }
  if (blockSize == -1) {
    blockSize = fs.getDefaultBlockSize(path);
  }
  FsPermission fsPermission = new FsPermission(permission);
  int bufferSize = fs.getConf().getInt("httpfs.buffer.size", 4096);
  OutputStream os = fs.create(path, fsPermission, override, bufferSize, replication, blockSize, null);
  IOUtils.copyBytes(is, os, bufferSize, true);
  os.close();
  return null;
}

Source File: SegmentHelper.java From indexr with Apache License 2.0

5 votes

public static long getSegmentBlockSize(FileSystem fileSystem, long fileSize) {
    long blockSize = fileSystem.getDefaultBlockSize();
    while (blockSize < fileSize) {
        blockSize <<= 1;
    }
    return blockSize;
}

Source File: RetriableFileCopyCommand.java From hadoop with Apache License 2.0

5 votes

/**
 * @return the block size of the source file if we need to preserve either
 *         the block size or the checksum type. Otherwise the default block
 *         size of the target FS.
 */
private static long getBlockSize(
        EnumSet<FileAttribute> fileAttributes,
        FileStatus sourceFile, FileSystem targetFS, Path tmpTargetPath) {
  boolean preserve = fileAttributes.contains(FileAttribute.BLOCKSIZE)
      || fileAttributes.contains(FileAttribute.CHECKSUMTYPE);
  return preserve ? sourceFile.getBlockSize() : targetFS
      .getDefaultBlockSize(tmpTargetPath);
}

Source File: TestCopyMapper.java From hadoop with Apache License 2.0

5 votes

private static void touchFile(String path, boolean createMultipleBlocks,
    ChecksumOpt checksumOpt) throws Exception {
  FileSystem fs;
  DataOutputStream outputStream = null;
  try {
    fs = cluster.getFileSystem();
    final Path qualifiedPath = new Path(path).makeQualified(fs.getUri(),
        fs.getWorkingDirectory());
    final long blockSize = createMultipleBlocks ? NON_DEFAULT_BLOCK_SIZE : fs
        .getDefaultBlockSize(qualifiedPath) * 2;
    FsPermission permission = FsPermission.getFileDefault().applyUMask(
        FsPermission.getUMask(fs.getConf()));
    outputStream = fs.create(qualifiedPath, permission,
        EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE), 0,
        (short) (fs.getDefaultReplication(qualifiedPath) * 2), blockSize,
        null, checksumOpt);
    byte[] bytes = new byte[DEFAULT_FILE_SIZE];
    outputStream.write(bytes);
    long fileSize = DEFAULT_FILE_SIZE;
    if (createMultipleBlocks) {
      while (fileSize < 2*blockSize) {
        outputStream.write(bytes);
        outputStream.flush();
        fileSize += DEFAULT_FILE_SIZE;
      }
    }
    pathList.add(qualifiedPath);
    ++nFiles;

    FileStatus fileStatus = fs.getFileStatus(qualifiedPath);
    System.out.println(fileStatus.getBlockSize());
    System.out.println(fileStatus.getReplication());
  }
  finally {
    IOUtils.cleanup(null, outputStream);
  }
}

Source File: SegmentHelper.java From indexr with Apache License 2.0

5 votes

public static long getSegmentBlockSize(FileSystem fileSystem, long fileSize) {
    long blockSize = fileSystem.getDefaultBlockSize();
    while (blockSize < fileSize) {
        blockSize <<= 1;
    }
    return blockSize;
}

Source File: SegmentHelper.java From indexr with Apache License 2.0

5 votes

public static long getSegmentBlockSize(FileSystem fileSystem, long fileSize) {
    long blockSize = fileSystem.getDefaultBlockSize();
    while (blockSize < fileSize) {
        blockSize <<= 1;
    }
    return blockSize;
}

Source File: RetriableFileCopyCommand.java From big-c with Apache License 2.0

5 votes

/**
 * @return the block size of the source file if we need to preserve either
 *         the block size or the checksum type. Otherwise the default block
 *         size of the target FS.
 */
private static long getBlockSize(
        EnumSet<FileAttribute> fileAttributes,
        FileStatus sourceFile, FileSystem targetFS, Path tmpTargetPath) {
  boolean preserve = fileAttributes.contains(FileAttribute.BLOCKSIZE)
      || fileAttributes.contains(FileAttribute.CHECKSUMTYPE);
  return preserve ? sourceFile.getBlockSize() : targetFS
      .getDefaultBlockSize(tmpTargetPath);
}

Source File: TestS3ABlocksize.java From big-c with Apache License 2.0

5 votes

@Test
@SuppressWarnings("deprecation")
public void testBlockSize() throws Exception {
  FileSystem fs = getFileSystem();
  long defaultBlockSize = fs.getDefaultBlockSize();
  assertEquals("incorrect blocksize",
      S3AFileSystem.DEFAULT_BLOCKSIZE, defaultBlockSize);
  long newBlockSize = defaultBlockSize * 2;
  fs.getConf().setLong(Constants.FS_S3A_BLOCK_SIZE, newBlockSize);

  Path dir = path("testBlockSize");
  Path file = new Path(dir, "file");
  createFile(fs, file, true, dataset(1024, 'a', 'z' - 'a'));
  FileStatus fileStatus = fs.getFileStatus(file);
  assertEquals("Double default block size in stat(): " + fileStatus,
      newBlockSize,
      fileStatus.getBlockSize());

  // check the listing  & assert that the block size is picked up by
  // this route too.
  boolean found = false;
  FileStatus[] listing = fs.listStatus(dir);
  for (FileStatus stat : listing) {
    LOG.info("entry: {}", stat);
    if (file.equals(stat.getPath())) {
      found = true;
      assertEquals("Double default block size in ls(): " + stat,
          newBlockSize,
          stat.getBlockSize());
    }
  }
  assertTrue("Did not find " + fileStatsToString(listing, ", "), found);
}

Source File: WriterBinaryBlock.java From systemds with Apache License 2.0

4 votes

@SuppressWarnings("deprecation")
protected final void writeBinaryBlockMatrixToSequenceFile( Path path, JobConf job, FileSystem fs, MatrixBlock src, int blen, int rl, int ru ) 
	throws IOException
{
	boolean sparse = src.isInSparseFormat();
	int rlen = src.getNumRows();
	int clen = src.getNumColumns();
	
	// 1) create sequence file writer, with right replication factor 
	// (config via MRConfigurationNames.DFS_REPLICATION not possible since sequence file internally calls fs.getDefaultReplication())
	SequenceFile.Writer writer = null;
	if( _replication > 0 ) //if replication specified (otherwise default)
	{
		//copy of SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class), except for replication
		writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class,
			job.getInt(HDFSTool.IO_FILE_BUFFER_SIZE, 4096),
			(short)_replication, fs.getDefaultBlockSize(), null, new SequenceFile.Metadata());	
	}
	else	
	{
		writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class);
	}
	
	try
	{
		// 2) bound check for src block
		if( src.getNumRows() > rlen || src.getNumColumns() > clen )
		{
			throw new IOException("Matrix block [1:"+src.getNumRows()+",1:"+src.getNumColumns()+"] " +
					              "out of overall matrix range [1:"+rlen+",1:"+clen+"].");
		}
	
		//3) reblock and write
		MatrixIndexes indexes = new MatrixIndexes();

		if( rlen <= blen && clen <= blen && rl == 0 ) //opt for single block
		{
			//directly write single block
			indexes.setIndexes(1, 1);
			writer.append(indexes, src);
		}
		else //general case
		{
			//initialize blocks for reuse (at most 4 different blocks required)
			MatrixBlock[] blocks = createMatrixBlocksForReuse(rlen, clen, blen, sparse, src.getNonZeros());
			
			//create and write subblocks of matrix
			for(int blockRow = rl/blen; blockRow < (int)Math.ceil(ru/(double)blen); blockRow++)
				for(int blockCol = 0; blockCol < (int)Math.ceil(src.getNumColumns()/(double)blen); blockCol++)
				{
					int maxRow = (blockRow*blen + blen < src.getNumRows()) ? blen : src.getNumRows() - blockRow*blen;
					int maxCol = (blockCol*blen + blen < src.getNumColumns()) ? blen : src.getNumColumns() - blockCol*blen;
			
					int row_offset = blockRow*blen;
					int col_offset = blockCol*blen;
					
					//get reuse matrix block
					MatrixBlock block = getMatrixBlockForReuse(blocks, maxRow, maxCol, blen);

					//copy submatrix to block
					src.slice( row_offset, row_offset+maxRow-1, 
							             col_offset, col_offset+maxCol-1, block );
					
					//append block to sequence file
					indexes.setIndexes(blockRow+1, blockCol+1);
					writer.append(indexes, block);
						
					//reset block for later reuse
					block.reset();
				}
		}
	}
	finally {
		IOUtilFunctions.closeSilently(writer);
	}
}

Source File: WriterBinaryBlock.java From systemds with Apache License 2.0

4 votes

@SuppressWarnings("deprecation")
protected final void writeDiagBinaryBlockMatrixToHDFS( Path path, JobConf job, FileSystem fs, MatrixBlock src, long rlen, long clen, int blen ) 
	throws IOException, DMLRuntimeException
{
	boolean sparse = src.isInSparseFormat();
	
	// 1) create sequence file writer, with right replication factor 
	// (config via MRConfigurationNames.DFS_REPLICATION not possible since sequence file internally calls fs.getDefaultReplication())
	SequenceFile.Writer writer = null;
	if( _replication > 0 ) //if replication specified (otherwise default)
	{
		//copy of SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class), except for replication
		writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class,
			job.getInt(HDFSTool.IO_FILE_BUFFER_SIZE, 4096),
			(short)_replication, fs.getDefaultBlockSize(), null, new SequenceFile.Metadata());
	}
	else	
	{
		writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class);
	}
	
	try
	{
		// 2) bound check for src block
		if( src.getNumRows() > rlen || src.getNumColumns() > clen )
		{
			throw new IOException("Matrix block [1:"+src.getNumRows()+",1:"+src.getNumColumns()+"] " +
					              "out of overall matrix range [1:"+rlen+",1:"+clen+"].");
		}
	
		//3) reblock and write
		MatrixIndexes indexes = new MatrixIndexes();

		if( rlen <= blen && clen <= blen ) //opt for single block
		{
			//directly write single block
			indexes.setIndexes(1, 1);
			writer.append(indexes, src);
		}
		else //general case
		{
			//initialize blocks for reuse (at most 4 different blocks required)
			MatrixBlock[] blocks = createMatrixBlocksForReuse(rlen, clen, blen, sparse, src.getNonZeros());
			MatrixBlock emptyBlock = new MatrixBlock();
				
			//create and write subblocks of matrix
			for(int blockRow = 0; blockRow < (int)Math.ceil(src.getNumRows()/(double)blen); blockRow++)
				for(int blockCol = 0; blockCol < (int)Math.ceil(src.getNumColumns()/(double)blen); blockCol++)
				{
					int maxRow = (blockRow*blen + blen < src.getNumRows()) ? blen : src.getNumRows() - blockRow*blen;
					int maxCol = (blockCol*blen + blen < src.getNumColumns()) ? blen : src.getNumColumns() - blockCol*blen;
					MatrixBlock block = null;
					
					if( blockRow==blockCol ) //block on diagonal
					{	
						int row_offset = blockRow*blen;
						int col_offset = blockCol*blen;
						
						//get reuse matrix block
						block = getMatrixBlockForReuse(blocks, maxRow, maxCol, blen);
	
						//copy submatrix to block
						src.slice( row_offset, row_offset+maxRow-1, 
							col_offset, col_offset+maxCol-1, block );
					}
					else //empty block (not on diagonal)
					{
						block = emptyBlock;
						block.reset(maxRow, maxCol);
					}
					
					//append block to sequence file
					indexes.setIndexes(blockRow+1, blockCol+1);
					writer.append(indexes, block);
					
					//reset block for later reuse
					if( blockRow!=blockCol )
						block.reset();
				}
		}				
	}
	finally {
		IOUtilFunctions.closeSilently(writer);
	}
}

Source File: HadoopShims.java From spork with Apache License 2.0

4 votes

public static long getDefaultBlockSize(FileSystem fs, Path path) {
    return fs.getDefaultBlockSize(path);
}

Source File: WALFile.java From streamx with Apache License 2.0

4 votes

Writer(Configuration conf, Option... opts) throws IOException {
  BlockSizeOption blockSizeOption =
      Options.getOption(BlockSizeOption.class, opts);
  BufferSizeOption bufferSizeOption =
      Options.getOption(BufferSizeOption.class, opts);
  ReplicationOption replicationOption =
      Options.getOption(ReplicationOption.class, opts);

  FileOption fileOption = Options.getOption(FileOption.class, opts);
  AppendIfExistsOption appendIfExistsOption = Options.getOption(
      AppendIfExistsOption.class, opts);
  StreamOption streamOption = Options.getOption(StreamOption.class, opts);

  // check consistency of options
  if ((fileOption == null) == (streamOption == null)) {
    throw new IllegalArgumentException("file or stream must be specified");
  }
  if (fileOption == null && (blockSizeOption != null ||
                             bufferSizeOption != null ||
                             replicationOption != null)) {
    throw new IllegalArgumentException("file modifier options not " +
                                       "compatible with stream");
  }

  FSDataOutputStream out;
  boolean ownStream = fileOption != null;
  if (ownStream) {
    Path p = fileOption.getValue();
    FileSystem fs;
    fs = p.getFileSystem(conf);
    int bufferSize = bufferSizeOption == null ? getBufferSize(conf) :
                     bufferSizeOption.getValue();
    short replication = replicationOption == null ?
                        fs.getDefaultReplication(p) :
                        (short) replicationOption.getValue();
    long blockSize = blockSizeOption == null ? fs.getDefaultBlockSize(p) :
                     blockSizeOption.getValue();

    if (appendIfExistsOption != null && appendIfExistsOption.getValue()
        && fs.exists(p)) {
      // Read the file and verify header details
      try (WALFile.Reader reader =
               new WALFile.Reader(conf, WALFile.Reader.file(p), new Reader.OnlyHeaderOption())){
        if (reader.getVersion() != VERSION[3]) {
          throw new VersionMismatchException(VERSION[3], reader.getVersion());
        }
        sync = reader.getSync();
      }
      out = fs.append(p, bufferSize);
      this.appendMode = true;
    } else {
      out = fs.create(p, true, bufferSize, replication, blockSize);
    }
  } else {
    out = streamOption.getValue();
  }

  init(conf, out, ownStream);
}

Source File: CopyableFile.java From incubator-gobblin with Apache License 2.0

4 votes

/**
 * @return desired block size for destination file.
 */
public long getBlockSize(FileSystem targetFs) {
  return getPreserve().preserve(PreserveAttributes.Option.BLOCK_SIZE) ?
      getOrigin().getBlockSize() : targetFs.getDefaultBlockSize(this.destination);
}

Source File: WriterBinaryBlock.java From systemds with Apache License 2.0

4 votes

@SuppressWarnings("deprecation")
protected final void writeDiagBinaryBlockMatrixToHDFS( Path path, JobConf job, FileSystem fs, MatrixBlock src, long rlen, long clen, int blen ) 
	throws IOException, DMLRuntimeException
{
	boolean sparse = src.isInSparseFormat();
	
	// 1) create sequence file writer, with right replication factor 
	// (config via MRConfigurationNames.DFS_REPLICATION not possible since sequence file internally calls fs.getDefaultReplication())
	SequenceFile.Writer writer = null;
	if( _replication > 0 ) //if replication specified (otherwise default)
	{
		//copy of SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class), except for replication
		writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class,
			job.getInt(HDFSTool.IO_FILE_BUFFER_SIZE, 4096),
			(short)_replication, fs.getDefaultBlockSize(), null, new SequenceFile.Metadata());
	}
	else	
	{
		writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class);
	}
	
	try
	{
		// 2) bound check for src block
		if( src.getNumRows() > rlen || src.getNumColumns() > clen )
		{
			throw new IOException("Matrix block [1:"+src.getNumRows()+",1:"+src.getNumColumns()+"] " +
					              "out of overall matrix range [1:"+rlen+",1:"+clen+"].");
		}
	
		//3) reblock and write
		MatrixIndexes indexes = new MatrixIndexes();

		if( rlen <= blen && clen <= blen ) //opt for single block
		{
			//directly write single block
			indexes.setIndexes(1, 1);
			writer.append(indexes, src);
		}
		else //general case
		{
			//initialize blocks for reuse (at most 4 different blocks required)
			MatrixBlock[] blocks = createMatrixBlocksForReuse(rlen, clen, blen, sparse, src.getNonZeros());
			MatrixBlock emptyBlock = new MatrixBlock();
				
			//create and write subblocks of matrix
			for(int blockRow = 0; blockRow < (int)Math.ceil(src.getNumRows()/(double)blen); blockRow++)
				for(int blockCol = 0; blockCol < (int)Math.ceil(src.getNumColumns()/(double)blen); blockCol++)
				{
					int maxRow = (blockRow*blen + blen < src.getNumRows()) ? blen : src.getNumRows() - blockRow*blen;
					int maxCol = (blockCol*blen + blen < src.getNumColumns()) ? blen : src.getNumColumns() - blockCol*blen;
					MatrixBlock block = null;
					
					if( blockRow==blockCol ) //block on diagonal
					{	
						int row_offset = blockRow*blen;
						int col_offset = blockCol*blen;
						
						//get reuse matrix block
						block = getMatrixBlockForReuse(blocks, maxRow, maxCol, blen);
	
						//copy submatrix to block
						src.slice( row_offset, row_offset+maxRow-1, 
							col_offset, col_offset+maxCol-1, block );
					}
					else //empty block (not on diagonal)
					{
						block = emptyBlock;
						block.reset(maxRow, maxCol);
					}
					
					//append block to sequence file
					indexes.setIndexes(blockRow+1, blockCol+1);
					writer.append(indexes, block);
					
					//reset block for later reuse
					if( blockRow!=blockCol )
						block.reset();
				}
		}				
	}
	finally {
		IOUtilFunctions.closeSilently(writer);
	}
}

Source File: HadoopShims.java From spork with Apache License 2.0

4 votes

public static long getDefaultBlockSize(FileSystem fs, Path path) {
    return fs.getDefaultBlockSize();
}

Source File: RCFile.java From incubator-tajo with Apache License 2.0

3 votes

/**
 * Constructs a RCFile Writer.
 *
 * @param fs
 *          the file system used
 * @param conf
 *          the configuration file
 * @param name
 *          the file name
 * @param progress a progress meter to update as the file is written
 * @param metadata a string to string map in the file header
 * @throws java.io.IOException
 */
public Writer(FileSystem fs, Configuration conf, Path name,
    Progressable progress, Metadata metadata, CompressionCodec codec) throws IOException {
  this(fs, conf, name, fs.getConf().getInt("io.file.buffer.size", 4096),
      fs.getDefaultReplication(), fs.getDefaultBlockSize(), progress,
      metadata, codec);
}

Source File: CommonFSUtils.java From hbase with Apache License 2.0

2 votes

/**
 * Return the number of bytes that large input files should be optimally
 * be split into to minimize i/o time.
 *
 * @param fs filesystem object
 * @return the default block size for the path's filesystem
 */
public static long getDefaultBlockSize(final FileSystem fs, final Path path) {
  return fs.getDefaultBlockSize(path);
}

Java Code Examples for org.apache.hadoop.fs.FileSystem#getDefaultBlockSize()