Java Code Examples for org.apache.hadoop.fs.ContentSummary#getLength()
The following examples show how to use
org.apache.hadoop.fs.ContentSummary#getLength() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ColumnToRowJob.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
private int calReducerNum(Path input) { try { long bytesPerReducer = DEFAULT_SIZE_PER_REDUCER; FileSystem fs = FileSystem.get(job.getConfiguration()); ContentSummary cs = fs.getContentSummary(input); long totalInputFileSize = cs.getLength(); int reducers = (int) ((totalInputFileSize + bytesPerReducer - 1) / bytesPerReducer); reducers = Math.max(1, reducers); reducers = Math.min(MAX_REDUCERS, reducers); logger.info("BytesPerReducer={}, maxReducers={}, totalInputFileSize={}, setReducers={}", bytesPerReducer, MAX_REDUCERS, totalInputFileSize, reducers); return reducers; } catch (IOException e) { logger.error("error when calculate reducer number", e); } return 1; }
Example 2
Source File: ColumnToRowJob.java From kylin with Apache License 2.0 | 6 votes |
private int calReducerNum(Path input) { try { long bytesPerReducer = DEFAULT_SIZE_PER_REDUCER; FileSystem fs = FileSystem.get(job.getConfiguration()); ContentSummary cs = fs.getContentSummary(input); long totalInputFileSize = cs.getLength(); int reducers = (int) ((totalInputFileSize + bytesPerReducer - 1) / bytesPerReducer); reducers = Math.max(1, reducers); reducers = Math.min(MAX_REDUCERS, reducers); logger.info("BytesPerReducer={}, maxReducers={}, totalInputFileSize={}, setReducers={}", bytesPerReducer, MAX_REDUCERS, totalInputFileSize, reducers); return reducers; } catch (IOException e) { logger.error("error when calculate reducer number", e); } return 1; }
Example 3
Source File: DataValidationInputFormat.java From jumbune with GNU Lesser General Public License v3.0 | 6 votes |
/** * Finds files inside directories recusively and add to fileStatusList * @param job refers to JobContext that is being used to read the configurations of the job that ran * @param minSize refers to the minimum file block size. * @param maxSize refers to the maximum file block size. * @param splits refers to a list of splits that are being generated. * @param fileStatusList list of FileStatus * @throws IOException Signals that an I/O exception has occurred. */ public void setData(JobContext job, long minSize, long maxSize, List<InputSplit> splits, List<FileStatus> fileStatusList) throws IOException { for(FileStatus file:fileStatusList) { if (file.isDirectory()) { Path dirPath = file.getPath(); FileStatus [] fileArray = dirPath.getFileSystem(job.getConfiguration()).listStatus(dirPath); setData(job, minSize, maxSize, splits, Arrays.asList(fileArray)); } else { //Checking whether file is empty or not Path path = file.getPath(); FileSystem fs = path.getFileSystem(job.getConfiguration()); ContentSummary cs = fs.getContentSummary(path); if (cs.getLength() > 0) { generateSplits(job, minSize, maxSize, splits, file); } } } }
Example 4
Source File: LogicalPlanner.java From incubator-tajo with Apache License 2.0 | 6 votes |
private void updatePhysicalInfo(TableDesc desc) { if (desc.getPath() != null) { try { FileSystem fs = desc.getPath().getFileSystem(new Configuration()); FileStatus status = fs.getFileStatus(desc.getPath()); if (desc.getStats() != null && (status.isDirectory() || status.isFile())) { ContentSummary summary = fs.getContentSummary(desc.getPath()); if (summary != null) { long volume = summary.getLength(); desc.getStats().setNumBytes(volume); } } } catch (Throwable t) { LOG.warn(t); } } }
Example 5
Source File: MRHiveDictUtil.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
private static long getFileSize(String hdfsUrl) throws IOException { Configuration configuration = new Configuration(); Path path = new Path(hdfsUrl); FileSystem fs = path.getFileSystem(configuration); ContentSummary contentSummary = fs.getContentSummary(path); return contentSummary.getLength(); }
Example 6
Source File: CreateFlatHiveTableStep.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
private long getFileSize(String hdfsUrl) throws IOException { Configuration configuration = new Configuration(); Path path = new Path(hdfsUrl); FileSystem fs = path.getFileSystem(configuration); ContentSummary contentSummary = fs.getContentSummary(path); long length = contentSummary.getLength(); return length; }
Example 7
Source File: StorageCleanupJob.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
private void cleanUnusedHdfsFiles() throws IOException { UnusedHdfsFileCollector collector = new UnusedHdfsFileCollector(); collectUnusedHdfsFiles(collector); if (collector.list.isEmpty()) { logger.info("No HDFS files to clean up"); return; } long garbageBytes = 0; List<String> garbageList = new ArrayList<>(); for (Pair<FileSystem, String> entry : collector.list) { FileSystem fs = entry.getKey(); String path = entry.getValue(); try { garbageList.add(path); ContentSummary sum = fs.getContentSummary(new Path(path)); if (sum != null) garbageBytes += sum.getLength(); if (delete) { logger.info("Deleting HDFS path " + path); fs.delete(new Path(path), true); } else { logger.info("Dry run, pending delete HDFS path " + path); } } catch (IOException e) { logger.error("Error dealing unused HDFS path " + path, e); } } hdfsGarbageFileBytes = garbageBytes; hdfsGarbageFiles = garbageList; }
Example 8
Source File: MRHiveDictUtil.java From kylin with Apache License 2.0 | 5 votes |
private static long getFileSize(String hdfsUrl) throws IOException { Configuration configuration = new Configuration(); Path path = new Path(hdfsUrl); FileSystem fs = path.getFileSystem(configuration); ContentSummary contentSummary = fs.getContentSummary(path); return contentSummary.getLength(); }
Example 9
Source File: CreateFlatHiveTableStep.java From kylin with Apache License 2.0 | 5 votes |
private long getFileSize(String hdfsUrl) throws IOException { Configuration configuration = new Configuration(); Path path = new Path(hdfsUrl); FileSystem fs = path.getFileSystem(configuration); ContentSummary contentSummary = fs.getContentSummary(path); long length = contentSummary.getLength(); return length; }
Example 10
Source File: StorageCleanupJob.java From kylin with Apache License 2.0 | 5 votes |
private void cleanUnusedHdfsFiles() throws IOException { UnusedHdfsFileCollector collector = new UnusedHdfsFileCollector(); collectUnusedHdfsFiles(collector); if (collector.list.isEmpty()) { logger.info("No HDFS files to clean up"); return; } long garbageBytes = 0; List<String> garbageList = new ArrayList<>(); for (Pair<FileSystem, String> entry : collector.list) { FileSystem fs = entry.getKey(); String path = entry.getValue(); try { garbageList.add(path); ContentSummary sum = fs.getContentSummary(new Path(path)); if (sum != null) garbageBytes += sum.getLength(); if (delete) { logger.info("Deleting HDFS path " + path); fs.delete(new Path(path), true); } else { logger.info("Dry run, pending delete HDFS path " + path); } } catch (IOException e) { logger.error("Error dealing unused HDFS path " + path, e); } } hdfsGarbageFileBytes = garbageBytes; hdfsGarbageFiles = garbageList; }
Example 11
Source File: BlurIndexSimpleWriter.java From incubator-retired-blur with Apache License 2.0 | 5 votes |
@Override public long getOnDiskSize() throws IOException { Path hdfsDirPath = _shardContext.getHdfsDirPath(); Configuration configuration = _tableContext.getConfiguration(); FileSystem fileSystem = hdfsDirPath.getFileSystem(configuration); ContentSummary contentSummary = fileSystem.getContentSummary(hdfsDirPath); return contentSummary.getLength(); }
Example 12
Source File: Query.java From tajo with Apache License 2.0 | 4 votes |
public static long getTableVolume(TajoConf systemConf, Path tablePath) throws IOException { FileSystem fs = tablePath.getFileSystem(systemConf); ContentSummary directorySummary = fs.getContentSummary(tablePath); return directorySummary.getLength(); }
Example 13
Source File: Query.java From incubator-tajo with Apache License 2.0 | 4 votes |
private long getTableVolume(TajoConf systemConf, Path tablePath) throws IOException { FileSystem fs = tablePath.getFileSystem(systemConf); ContentSummary directorySummary = fs.getContentSummary(tablePath); return directorySummary.getLength(); }