Java Code Examples for org.apache.hadoop.fs.Path#getFileSystem()
The following examples show how to use
org.apache.hadoop.fs.Path#getFileSystem() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TezCommonUtils.java From incubator-tez with Apache License 2.0 | 6 votes |
/** * <p> * This function returns the staging directory defined in the config with * property name <code>TezConfiguration.TEZ_AM_STAGING_DIR</code>. If the * property is not defined in the conf, Tez uses the value defined as * <code>TezConfiguration.TEZ_AM_STAGING_DIR_DEFAULT</code>. In addition, the * function makes sure if the staging directory exists. If not, it creates the * directory with permission <code>TEZ_AM_DIR_PERMISSION</code>. * </p> * * @param conf * TEZ configuration * @return Fully qualified staging directory */ public static Path getTezBaseStagingPath(Configuration conf) { String stagingDirStr = conf.get(TezConfiguration.TEZ_AM_STAGING_DIR, TezConfiguration.TEZ_AM_STAGING_DIR_DEFAULT); Path baseStagingDir; try { Path p = new Path(stagingDirStr); FileSystem fs = p.getFileSystem(conf); if (!fs.exists(p)) { mkDirForAM(fs, p); LOG.info("Stage directory " + p + " doesn't exist and is created"); } baseStagingDir = fs.resolvePath(p); } catch (IOException e) { throw new TezUncheckedException(e); } return baseStagingDir; }
Example 2
Source File: FileOutputCommitter.java From hadoop-gpu with Apache License 2.0 | 6 votes |
public boolean needsTaskCommit(TaskAttemptContext context) throws IOException { try { Path taskOutputPath = getTempTaskOutputPath(context); if (taskOutputPath != null) { context.getProgressible().progress(); // Get the file-system for the task output directory FileSystem fs = taskOutputPath.getFileSystem(context.getJobConf()); // since task output path is created on demand, // if it exists, task needs a commit if (fs.exists(taskOutputPath)) { return true; } } } catch (IOException ioe) { throw ioe; } return false; }
Example 3
Source File: TestCredentialProviderFactory.java From hadoop with Apache License 2.0 | 6 votes |
@Test public void testJksProvider() throws Exception { Configuration conf = new Configuration(); final Path jksPath = new Path(tmpDir.toString(), "test.jks"); final String ourUrl = JavaKeyStoreProvider.SCHEME_NAME + "://file" + jksPath.toUri(); File file = new File(tmpDir, "test.jks"); file.delete(); conf.set(CredentialProviderFactory.CREDENTIAL_PROVIDER_PATH, ourUrl); checkSpecificProvider(conf, ourUrl); Path path = ProviderUtils.unnestUri(new URI(ourUrl)); FileSystem fs = path.getFileSystem(conf); FileStatus s = fs.getFileStatus(path); assertTrue(s.getPermission().toString().equals("rwx------")); assertTrue(file + " should exist", file.isFile()); // check permission retention after explicit change fs.setPermission(path, new FsPermission("777")); checkPermissionRetention(conf, ourUrl, path); }
Example 4
Source File: TeraOutputFormat.java From incubator-tez with Apache License 2.0 | 5 votes |
public RecordWriter<Text,Text> getRecordWriter(TaskAttemptContext job ) throws IOException { Path file = getDefaultWorkFile(job, ""); FileSystem fs = file.getFileSystem(job.getConfiguration()); FSDataOutputStream fileOut = fs.create(file); return new TeraRecordWriter(fileOut, job); }
Example 5
Source File: FSDownload.java From big-c with Apache License 2.0 | 5 votes |
/** * Creates the cache loader for the status loading cache. This should be used * to create an instance of the status cache that is passed into the * FSDownload constructor. */ public static CacheLoader<Path,Future<FileStatus>> createStatusCacheLoader(final Configuration conf) { return new CacheLoader<Path,Future<FileStatus>>() { public Future<FileStatus> load(Path path) { try { FileSystem fs = path.getFileSystem(conf); return Futures.immediateFuture(fs.getFileStatus(path)); } catch (Throwable th) { // report failures so it can be memoized return Futures.immediateFailedFuture(th); } } }; }
Example 6
Source File: TestParquetWriter.java From dremio-oss with Apache License 2.0 | 5 votes |
public void runTestAndValidate(String selection, String validationSelection, String inputTable, String outputFile, boolean sort) throws Exception { try { deleteTableIfExists(outputFile); test("use dfs_test"); // test("ALTER SESSION SET \"planner.add_producer_consumer\" = false"); String query = select(selection, inputTable, sort); System.out.println(outputFile); String create = "CREATE TABLE " + outputFile + " AS " + query; String validateQuery = select(validationSelection, outputFile, sort); test(create); test(validateQuery); // TODO: remove testBuilder() .unOrdered() .sqlQuery(validateQuery) .sqlBaselineQuery(query) .go(); Configuration hadoopConf = new Configuration(); Path output = new Path(getDfsTestTmpSchemaLocation(), outputFile); FileSystem fs = output.getFileSystem(hadoopConf); for (FileStatus file : fs.listStatus(output)) { ParquetMetadata footer = ParquetFileReader.readFooter(hadoopConf, file, SKIP_ROW_GROUPS); String version = footer.getFileMetaData().getKeyValueMetaData().get(DREMIO_VERSION_PROPERTY); assertEquals(DremioVersionInfo.getVersion(), version); PageHeaderUtil.validatePageHeaders(file.getPath(), footer); } } finally { deleteTableIfExists(outputFile); } }
Example 7
Source File: TestJoinDatamerge.java From hadoop with Apache License 2.0 | 5 votes |
private static SequenceFile.Writer[] createWriters(Path testdir, Configuration conf, int srcs, Path[] src) throws IOException { for (int i = 0; i < srcs; ++i) { src[i] = new Path(testdir, Integer.toString(i + 10, 36)); } SequenceFile.Writer out[] = new SequenceFile.Writer[srcs]; for (int i = 0; i < srcs; ++i) { out[i] = new SequenceFile.Writer(testdir.getFileSystem(conf), conf, src[i], IntWritable.class, IntWritable.class); } return out; }
Example 8
Source File: MultiFileSplit.java From hadoop with Apache License 2.0 | 5 votes |
public String[] getLocations() throws IOException { HashSet<String> hostSet = new HashSet<String>(); for (Path file : getPaths()) { FileSystem fs = file.getFileSystem(getJob()); FileStatus status = fs.getFileStatus(file); BlockLocation[] blkLocations = fs.getFileBlockLocations(status, 0, status.getLen()); if (blkLocations != null && blkLocations.length > 0) { addToSet(hostSet, blkLocations[0].getHosts()); } } return hostSet.toArray(new String[hostSet.size()]); }
Example 9
Source File: TestDataJoin.java From big-c with Apache License 2.0 | 5 votes |
private static SequenceFile.Writer[] createWriters(Path testdir, JobConf conf, int srcs, Path[] src) throws IOException { for (int i = 0; i < srcs; ++i) { src[i] = new Path(testdir, Integer.toString(i + 10, 36)); } SequenceFile.Writer out[] = new SequenceFile.Writer[srcs]; for (int i = 0; i < srcs; ++i) { out[i] = new SequenceFile.Writer(testdir.getFileSystem(conf), conf, src[i], Text.class, Text.class); } return out; }
Example 10
Source File: MRHiveDictUtil.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
private static long getFileSize(String hdfsUrl) throws IOException { Configuration configuration = new Configuration(); Path path = new Path(hdfsUrl); FileSystem fs = path.getFileSystem(configuration); ContentSummary contentSummary = fs.getContentSummary(path); return contentSummary.getLength(); }
Example 11
Source File: DistRaid.java From RDFS with Apache License 2.0 | 5 votes |
public void cleanUp() { for (Codec codec: Codec.getCodecs()) { Path tmpdir = new Path(codec.tmpParityDirectory, this.getJobID()); try { FileSystem fs = tmpdir.getFileSystem(jobconf); if (fs.exists(tmpdir)) { fs.delete(tmpdir, true); } } catch (IOException ioe) { LOG.error("Fail to delete " + tmpdir, ioe); } } }
Example 12
Source File: AdmmIterationMapper.java From laser with Apache License 2.0 | 5 votes |
protected void setup(Context context) throws IOException, InterruptedException { conf = context.getConfiguration(); iteration = Integer.parseInt(conf.get("iteration.number")); addIntercept = conf.getBoolean("add.intercept", false); rho = conf.getFloat("rho", DEFAULT_RHO); regularizationFactor = conf.getFloat("regularization.factor", DEFAULT_REGULARIZATION_FACTOR); previousIntermediateOutputLocation = conf .get("previous.intermediate.output.location"); previousIntermediateOutputLocationPath = new Path( previousIntermediateOutputLocation); try { fs = previousIntermediateOutputLocationPath.getFileSystem(conf); } catch (IOException e) { LOG.info(e.toString()); } lbfgs = new QNMinimizer(); FileSplit split = (FileSplit) context.getInputSplit(); splitId = split.getPath() + ":" + Long.toString(split.getStart()) + " - " + Long.toString(split.getLength()); splitId = removeIpFromHdfsFileName(splitId); inputSplitData = new LinkedList<Vector>(); }
Example 13
Source File: ScanPerformanceEvaluation.java From hbase with Apache License 2.0 | 5 votes |
@Override public void setConf(Configuration conf) { super.setConf(conf); Path rootDir; try { rootDir = CommonFSUtils.getRootDir(conf); rootDir.getFileSystem(conf); } catch (IOException ex) { throw new RuntimeException(ex); } }
Example 14
Source File: GenericMRLoadGenerator.java From hadoop with Apache License 2.0 | 4 votes |
public int run(String [] argv) throws Exception { JobConf job = new JobConf(getConf()); job.setJarByClass(GenericMRLoadGenerator.class); job.setMapperClass(SampleMapper.class); job.setReducerClass(SampleReducer.class); if (!parseArgs(argv, job)) { return -1; } if (null == FileOutputFormat.getOutputPath(job)) { // No output dir? No writes job.setOutputFormat(NullOutputFormat.class); } if (0 == FileInputFormat.getInputPaths(job).length) { // No input dir? Generate random data System.err.println("No input path; ignoring InputFormat"); confRandom(job); } else if (null != job.getClass( org.apache.hadoop.mapreduce.GenericMRLoadGenerator.INDIRECT_INPUT_FORMAT, null)) { // specified IndirectInputFormat? Build src list JobClient jClient = new JobClient(job); Path tmpDir = new Path(jClient.getFs().getHomeDirectory(), ".staging"); Random r = new Random(); Path indirInputFile = new Path(tmpDir, Integer.toString(r.nextInt(Integer.MAX_VALUE), 36) + "_files"); job.set( org.apache.hadoop.mapreduce.GenericMRLoadGenerator.INDIRECT_INPUT_FILE, indirInputFile.toString()); SequenceFile.Writer writer = SequenceFile.createWriter( tmpDir.getFileSystem(job), job, indirInputFile, LongWritable.class, Text.class, SequenceFile.CompressionType.NONE); try { for (Path p : FileInputFormat.getInputPaths(job)) { FileSystem fs = p.getFileSystem(job); Stack<Path> pathstack = new Stack<Path>(); pathstack.push(p); while (!pathstack.empty()) { for (FileStatus stat : fs.listStatus(pathstack.pop())) { if (stat.isDirectory()) { if (!stat.getPath().getName().startsWith("_")) { pathstack.push(stat.getPath()); } } else { writer.sync(); writer.append(new LongWritable(stat.getLen()), new Text(stat.getPath().toUri().toString())); } } } } } finally { writer.close(); } } Date startTime = new Date(); System.out.println("Job started: " + startTime); JobClient.runJob(job); Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) /1000 + " seconds."); return 0; }
Example 15
Source File: Cluster.java From spork with Apache License 2.0 | 4 votes |
public void copyFromLocalFile(Path local, Path destination, boolean overwrite) throws IOException { FileSystem fs = local.getFileSystem(configuration); fs.copyFromLocalFile(false, overwrite, local, destination); }
Example 16
Source File: DelimitedTextInputFormat.java From marklogic-contentpump with Apache License 2.0 | 4 votes |
public List<InputSplit> getSplits(JobContext job) throws IOException { boolean delimSplit = isSplitInput(job.getConfiguration()); //if delimSplit is true, size of each split is determined by //Math.max(minSize, Math.min(maxSize, blockSize)) in FileInputFormat List<InputSplit> splits = super.getSplits(job); if (!delimSplit) { return splits; } if (splits.size()>= SPLIT_COUNT_LIMIT) { //if #splits > 1 million, there is enough parallelism //therefore no point to split LOG.warn("Exceeding SPLIT_COUNT_LIMIT, input_split is off:" + SPLIT_COUNT_LIMIT); DefaultStringifier.store(job.getConfiguration(), false, ConfigConstants.CONF_SPLIT_INPUT); return splits; } // add header info into splits List<InputSplit> populatedSplits = new ArrayList<InputSplit>(); LOG.info(splits.size() + " DelimitedSplits generated"); Configuration conf = job.getConfiguration(); char delimiter =0; ArrayList<Text> hlist = new ArrayList<Text>(); for (InputSplit file: splits) { FileSplit fsplit = ((FileSplit)file); Path path = fsplit.getPath(); FileSystem fs = path.getFileSystem(conf); if (fsplit.getStart() == 0) { // parse the inSplit, get the header FSDataInputStream fileIn = fs.open(path); String delimStr = conf.get(ConfigConstants.CONF_DELIMITER, ConfigConstants.DEFAULT_DELIMITER); if (delimStr.length() == 1) { delimiter = delimStr.charAt(0); } else { LOG.error("Incorrect delimitor: " + delimiter + ". Expects single character."); } String encoding = conf.get( MarkLogicConstants.OUTPUT_CONTENT_ENCODING, MarkLogicConstants.DEFAULT_OUTPUT_CONTENT_ENCODING); InputStreamReader instream = new InputStreamReader(fileIn, encoding); CSVParser parser = new CSVParser(instream, CSVParserFormatter. getFormat(delimiter, DelimitedTextReader.encapsulator, true, true)); Iterator<CSVRecord> it = parser.iterator(); String[] header = null; if (it.hasNext()) { CSVRecord record = (CSVRecord)it.next(); Iterator<String> recordIterator = record.iterator(); int recordSize = record.size(); header = new String[recordSize]; for (int i = 0; i < recordSize; i++) { if (recordIterator.hasNext()) { header[i] = (String)recordIterator.next(); } else { throw new IOException("Record size doesn't match the real size"); } } EncodingUtil.handleBOMUTF8(header, 0); hlist.clear(); for (String s : header) { hlist.add(new Text(s)); } } instream.close(); } DelimitedSplit ds = new DelimitedSplit(new TextArrayWritable( hlist.toArray(new Text[hlist.size()])), path, fsplit.getStart(), fsplit.getLength(), fsplit.getLocations()); populatedSplits.add(ds); } return populatedSplits; }
Example 17
Source File: TajoMasterClientService.java From tajo with Apache License 2.0 | 4 votes |
@Override public TableResponse createExternalTable(RpcController controller, CreateTableRequest request) throws ServiceException { try { Session session = context.getSessionManager().getSession(request.getSessionId().getId()); QueryContext queryContext = new QueryContext(conf, session); Path path = new Path(request.getPath()); FileSystem fs = path.getFileSystem(conf); if (!fs.exists(path)) { throw new UnavailableTableLocationException(path.toString(), "no such a directory"); } Schema schema = null; if (request.hasSchema()) { schema = SchemaFactory.newV1(request.getSchema()); } TableMeta meta = new TableMeta(request.getMeta()); PartitionMethodDesc partitionDesc = null; if (request.hasPartition()) { partitionDesc = new PartitionMethodDesc(request.getPartition()); } TableDesc desc = context.getGlobalEngine().getDDLExecutor().getCreateTableExecutor().create( queryContext, request.getName(), null, schema, meta, path.toUri(), true, partitionDesc, false ); return TableResponse.newBuilder() .setState(OK) .setTable(desc.getProto()).build(); } catch (Throwable t) { printStackTraceIfError(LOG, t); return TableResponse.newBuilder() .setState(returnError(t)) .build(); } }
Example 18
Source File: CustomOutputCommitter.java From hadoop with Apache License 2.0 | 4 votes |
private void writeFile(JobConf conf , String filename) throws IOException { System.out.println("writing file ----" + filename); Path outputPath = FileOutputFormat.getOutputPath(conf); FileSystem fs = outputPath.getFileSystem(conf); fs.create(new Path(outputPath, filename)).close(); }
Example 19
Source File: DistCpSync.java From big-c with Apache License 2.0 | 4 votes |
static boolean sync(DistCpOptions inputOptions, Configuration conf) throws IOException { List<Path> sourcePaths = inputOptions.getSourcePaths(); if (sourcePaths.size() != 1) { // we only support one source dir which must be a snapshottable directory throw new IllegalArgumentException(sourcePaths.size() + " source paths are provided"); } final Path sourceDir = sourcePaths.get(0); final Path targetDir = inputOptions.getTargetPath(); final FileSystem sfs = sourceDir.getFileSystem(conf); final FileSystem tfs = targetDir.getFileSystem(conf); // currently we require both the source and the target file system are // DistributedFileSystem. if (!(sfs instanceof DistributedFileSystem) || !(tfs instanceof DistributedFileSystem)) { throw new IllegalArgumentException("The FileSystems needs to" + " be DistributedFileSystem for using snapshot-diff-based distcp"); } final DistributedFileSystem sourceFs = (DistributedFileSystem) sfs; final DistributedFileSystem targetFs= (DistributedFileSystem) tfs; // make sure targetFS has no change between from and the current states if (!checkNoChange(inputOptions, targetFs, targetDir)) { // set the source path using the snapshot path inputOptions.setSourcePaths(Arrays.asList(getSourceSnapshotPath(sourceDir, inputOptions.getToSnapshot()))); return false; } Path tmpDir = null; try { tmpDir = createTargetTmpDir(targetFs, targetDir); DiffInfo[] diffs = getDiffs(inputOptions, sourceFs, sourceDir, targetDir); if (diffs == null) { return false; } // do the real sync work: deletion and rename syncDiff(diffs, targetFs, tmpDir); return true; } catch (Exception e) { DistCp.LOG.warn("Failed to use snapshot diff for distcp", e); return false; } finally { deleteTargetTmpDir(targetFs, tmpDir); // TODO: since we have tmp directory, we can support "undo" with failures // set the source path using the snapshot path inputOptions.setSourcePaths(Arrays.asList(getSourceSnapshotPath(sourceDir, inputOptions.getToSnapshot()))); } }
Example 20
Source File: HBCKFsUtils.java From hbase-operator-tools with Apache License 2.0 | 2 votes |
/** * * COPIED from CommonFSUtils.getRootDir * * @param c configuration * @return {@link Path} to hbase root directory from * configuration as a qualified Path. * @throws IOException e */ public static Path getRootDir(final Configuration c) throws IOException { Path p = new Path(c.get(HConstants.HBASE_DIR)); FileSystem fs = p.getFileSystem(c); return p.makeQualified(fs.getUri(), fs.getWorkingDirectory()); }