org.apache.hadoop.fs.Path#getFileSystem

Source File: TezCommonUtils.java From incubator-tez with Apache License 2.0

6 votes

/**
 * <p>
 * This function returns the staging directory defined in the config with
 * property name <code>TezConfiguration.TEZ_AM_STAGING_DIR</code>. If the
 * property is not defined in the conf, Tez uses the value defined as
 * <code>TezConfiguration.TEZ_AM_STAGING_DIR_DEFAULT</code>. In addition, the
 * function makes sure if the staging directory exists. If not, it creates the
 * directory with permission <code>TEZ_AM_DIR_PERMISSION</code>.
 * </p>
 * 
 * @param conf
 *          TEZ configuration
 * @return Fully qualified staging directory
 */
public static Path getTezBaseStagingPath(Configuration conf) {
  String stagingDirStr = conf.get(TezConfiguration.TEZ_AM_STAGING_DIR,
      TezConfiguration.TEZ_AM_STAGING_DIR_DEFAULT);
  Path baseStagingDir;
  try {
    Path p = new Path(stagingDirStr);
    FileSystem fs = p.getFileSystem(conf);
    if (!fs.exists(p)) {
      mkDirForAM(fs, p);
      LOG.info("Stage directory " + p + " doesn't exist and is created");
    }
    baseStagingDir = fs.resolvePath(p);
  } catch (IOException e) {
    throw new TezUncheckedException(e);
  }
  return baseStagingDir;
}

Source File: FileOutputCommitter.java From hadoop-gpu with Apache License 2.0

6 votes

public boolean needsTaskCommit(TaskAttemptContext context) 
throws IOException {
  try {
    Path taskOutputPath = getTempTaskOutputPath(context);
    if (taskOutputPath != null) {
      context.getProgressible().progress();
      // Get the file-system for the task output directory
      FileSystem fs = taskOutputPath.getFileSystem(context.getJobConf());
      // since task output path is created on demand, 
      // if it exists, task needs a commit
      if (fs.exists(taskOutputPath)) {
        return true;
      }
    }
  } catch (IOException  ioe) {
    throw ioe;
  }
  return false;
}

Source File: TestCredentialProviderFactory.java From hadoop with Apache License 2.0

6 votes

@Test
public void testJksProvider() throws Exception {
  Configuration conf = new Configuration();
  final Path jksPath = new Path(tmpDir.toString(), "test.jks");
  final String ourUrl =
      JavaKeyStoreProvider.SCHEME_NAME + "://file" + jksPath.toUri();

  File file = new File(tmpDir, "test.jks");
  file.delete();
  conf.set(CredentialProviderFactory.CREDENTIAL_PROVIDER_PATH, ourUrl);
  checkSpecificProvider(conf, ourUrl);
  Path path = ProviderUtils.unnestUri(new URI(ourUrl));
  FileSystem fs = path.getFileSystem(conf);
  FileStatus s = fs.getFileStatus(path);
  assertTrue(s.getPermission().toString().equals("rwx------"));
  assertTrue(file + " should exist", file.isFile());

  // check permission retention after explicit change
  fs.setPermission(path, new FsPermission("777"));
  checkPermissionRetention(conf, ourUrl, path);
}

Source File: TeraOutputFormat.java From incubator-tez with Apache License 2.0

5 votes

public RecordWriter<Text,Text> getRecordWriter(TaskAttemptContext job
                                               ) throws IOException {
  Path file = getDefaultWorkFile(job, "");
  FileSystem fs = file.getFileSystem(job.getConfiguration());
   FSDataOutputStream fileOut = fs.create(file);
  return new TeraRecordWriter(fileOut, job);
}

Source File: FSDownload.java From big-c with Apache License 2.0

5 votes

/**
 * Creates the cache loader for the status loading cache. This should be used
 * to create an instance of the status cache that is passed into the
 * FSDownload constructor.
 */
public static CacheLoader<Path,Future<FileStatus>>
    createStatusCacheLoader(final Configuration conf) {
  return new CacheLoader<Path,Future<FileStatus>>() {
    public Future<FileStatus> load(Path path) {
      try {
        FileSystem fs = path.getFileSystem(conf);
        return Futures.immediateFuture(fs.getFileStatus(path));
      } catch (Throwable th) {
        // report failures so it can be memoized
        return Futures.immediateFailedFuture(th);
      }
    }
  };
}

Source File: TestParquetWriter.java From dremio-oss with Apache License 2.0

5 votes

public void runTestAndValidate(String selection, String validationSelection, String inputTable, String outputFile, boolean sort) throws Exception {
  try {
    deleteTableIfExists(outputFile);
    test("use dfs_test");
//    test("ALTER SESSION SET \"planner.add_producer_consumer\" = false");
    String query = select(selection, inputTable, sort);
    System.out.println(outputFile);
    String create = "CREATE TABLE " + outputFile + " AS " + query;
    String validateQuery = select(validationSelection, outputFile, sort);
    test(create);
    test(validateQuery); // TODO: remove
    testBuilder()
        .unOrdered()
        .sqlQuery(validateQuery)
        .sqlBaselineQuery(query)
        .go();

    Configuration hadoopConf = new Configuration();
    Path output = new Path(getDfsTestTmpSchemaLocation(), outputFile);
    FileSystem fs = output.getFileSystem(hadoopConf);
    for (FileStatus file : fs.listStatus(output)) {
      ParquetMetadata footer = ParquetFileReader.readFooter(hadoopConf, file, SKIP_ROW_GROUPS);
      String version = footer.getFileMetaData().getKeyValueMetaData().get(DREMIO_VERSION_PROPERTY);
      assertEquals(DremioVersionInfo.getVersion(), version);
      PageHeaderUtil.validatePageHeaders(file.getPath(), footer);
    }
  } finally {
    deleteTableIfExists(outputFile);
  }
}

Source File: TestJoinDatamerge.java From hadoop with Apache License 2.0

5 votes

private static SequenceFile.Writer[] createWriters(Path testdir,
    Configuration conf, int srcs, Path[] src) throws IOException {
  for (int i = 0; i < srcs; ++i) {
    src[i] = new Path(testdir, Integer.toString(i + 10, 36));
  }
  SequenceFile.Writer out[] = new SequenceFile.Writer[srcs];
  for (int i = 0; i < srcs; ++i) {
    out[i] = new SequenceFile.Writer(testdir.getFileSystem(conf), conf,
        src[i], IntWritable.class, IntWritable.class);
  }
  return out;
}

Source File: MultiFileSplit.java From hadoop with Apache License 2.0

5 votes

public String[] getLocations() throws IOException {
  HashSet<String> hostSet = new HashSet<String>();
  for (Path file : getPaths()) {
    FileSystem fs = file.getFileSystem(getJob());
    FileStatus status = fs.getFileStatus(file);
    BlockLocation[] blkLocations = fs.getFileBlockLocations(status,
                                        0, status.getLen());
    if (blkLocations != null && blkLocations.length > 0) {
      addToSet(hostSet, blkLocations[0].getHosts());
    }
  }
  return hostSet.toArray(new String[hostSet.size()]);
}

Source File: TestDataJoin.java From big-c with Apache License 2.0

5 votes

private static SequenceFile.Writer[] createWriters(Path testdir,
    JobConf conf, int srcs, Path[] src) throws IOException {
  for (int i = 0; i < srcs; ++i) {
    src[i] = new Path(testdir, Integer.toString(i + 10, 36));
  }
  SequenceFile.Writer out[] = new SequenceFile.Writer[srcs];
  for (int i = 0; i < srcs; ++i) {
    out[i] = new SequenceFile.Writer(testdir.getFileSystem(conf), conf,
        src[i], Text.class, Text.class);
  }
  return out;
}

Source File: MRHiveDictUtil.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

private static long getFileSize(String hdfsUrl) throws IOException {
    Configuration configuration = new Configuration();
    Path path = new Path(hdfsUrl);
    FileSystem fs = path.getFileSystem(configuration);
    ContentSummary contentSummary = fs.getContentSummary(path);
    return contentSummary.getLength();
}

Source File: DistRaid.java From RDFS with Apache License 2.0

5 votes

public void cleanUp() {
  for (Codec codec: Codec.getCodecs()) {
    Path tmpdir = new Path(codec.tmpParityDirectory, this.getJobID());
    try {
      FileSystem fs = tmpdir.getFileSystem(jobconf);
      if (fs.exists(tmpdir)) {
        fs.delete(tmpdir, true);
      }
    } catch (IOException ioe) {
      LOG.error("Fail to delete " + tmpdir, ioe);
    }
  }
}

Source File: AdmmIterationMapper.java From laser with Apache License 2.0

5 votes

protected void setup(Context context) throws IOException,
		InterruptedException {
	conf = context.getConfiguration();
	iteration = Integer.parseInt(conf.get("iteration.number"));
	addIntercept = conf.getBoolean("add.intercept", false);
	rho = conf.getFloat("rho", DEFAULT_RHO);
	regularizationFactor = conf.getFloat("regularization.factor",
			DEFAULT_REGULARIZATION_FACTOR);
	previousIntermediateOutputLocation = conf
			.get("previous.intermediate.output.location");
	previousIntermediateOutputLocationPath = new Path(
			previousIntermediateOutputLocation);

	try {
		fs = previousIntermediateOutputLocationPath.getFileSystem(conf);
	} catch (IOException e) {
		LOG.info(e.toString());
	}

	lbfgs = new QNMinimizer();

	FileSplit split = (FileSplit) context.getInputSplit();
	splitId = split.getPath() + ":" + Long.toString(split.getStart())
			+ " - " + Long.toString(split.getLength());
	splitId = removeIpFromHdfsFileName(splitId);

	inputSplitData = new LinkedList<Vector>();
}

Source File: ScanPerformanceEvaluation.java From hbase with Apache License 2.0

5 votes

@Override
public void setConf(Configuration conf) {
  super.setConf(conf);
  Path rootDir;
  try {
    rootDir = CommonFSUtils.getRootDir(conf);
    rootDir.getFileSystem(conf);
  } catch (IOException ex) {
    throw new RuntimeException(ex);
  }
}

Source File: GenericMRLoadGenerator.java From hadoop with Apache License 2.0

4 votes

public int run(String [] argv) throws Exception {
  JobConf job = new JobConf(getConf());
  job.setJarByClass(GenericMRLoadGenerator.class);
  job.setMapperClass(SampleMapper.class);
  job.setReducerClass(SampleReducer.class);
  if (!parseArgs(argv, job)) {
    return -1;
  }

  if (null == FileOutputFormat.getOutputPath(job)) {
    // No output dir? No writes
    job.setOutputFormat(NullOutputFormat.class);
  }

  if (0 == FileInputFormat.getInputPaths(job).length) {
    // No input dir? Generate random data
    System.err.println("No input path; ignoring InputFormat");
    confRandom(job);
  } else if (null != job.getClass(
     org.apache.hadoop.mapreduce.GenericMRLoadGenerator.INDIRECT_INPUT_FORMAT,
     null)) {
    // specified IndirectInputFormat? Build src list
    JobClient jClient = new JobClient(job);
    Path tmpDir = new Path(jClient.getFs().getHomeDirectory(), ".staging");
    Random r = new Random();
    Path indirInputFile = new Path(tmpDir,
        Integer.toString(r.nextInt(Integer.MAX_VALUE), 36) + "_files");
    job.set(
      org.apache.hadoop.mapreduce.GenericMRLoadGenerator.INDIRECT_INPUT_FILE,
      indirInputFile.toString());
    SequenceFile.Writer writer = SequenceFile.createWriter(
        tmpDir.getFileSystem(job), job, indirInputFile,
        LongWritable.class, Text.class,
        SequenceFile.CompressionType.NONE);
    try {
      for (Path p : FileInputFormat.getInputPaths(job)) {
        FileSystem fs = p.getFileSystem(job);
        Stack<Path> pathstack = new Stack<Path>();
        pathstack.push(p);
        while (!pathstack.empty()) {
          for (FileStatus stat : fs.listStatus(pathstack.pop())) {
            if (stat.isDirectory()) {
              if (!stat.getPath().getName().startsWith("_")) {
                pathstack.push(stat.getPath());
              }
            } else {
              writer.sync();
              writer.append(new LongWritable(stat.getLen()),
                  new Text(stat.getPath().toUri().toString()));
            }
          }
        }
      }
    } finally {
      writer.close();
    }
  }

  Date startTime = new Date();
  System.out.println("Job started: " + startTime);
  JobClient.runJob(job);
  Date endTime = new Date();
  System.out.println("Job ended: " + endTime);
  System.out.println("The job took " +
                     (endTime.getTime() - startTime.getTime()) /1000 +
                     " seconds.");

  return 0;
}

Source File: Cluster.java From spork with Apache License 2.0

4 votes

public void copyFromLocalFile(Path local, Path destination, boolean overwrite)
    throws IOException {
  FileSystem fs = local.getFileSystem(configuration);
  fs.copyFromLocalFile(false, overwrite, local, destination);
}

Source File: DelimitedTextInputFormat.java From marklogic-contentpump with Apache License 2.0

4 votes

public List<InputSplit> getSplits(JobContext job) throws IOException {
    boolean delimSplit = isSplitInput(job.getConfiguration());
    //if delimSplit is true, size of each split is determined by 
    //Math.max(minSize, Math.min(maxSize, blockSize)) in FileInputFormat
    List<InputSplit> splits = super.getSplits(job);
    if (!delimSplit) {
        return splits;
    }

    if (splits.size()>= SPLIT_COUNT_LIMIT) {
        //if #splits > 1 million, there is enough parallelism
        //therefore no point to split
        LOG.warn("Exceeding SPLIT_COUNT_LIMIT, input_split is off:"
            + SPLIT_COUNT_LIMIT);
        DefaultStringifier.store(job.getConfiguration(), false, ConfigConstants.CONF_SPLIT_INPUT);
        return splits;
    }
    // add header info into splits
    List<InputSplit> populatedSplits = new ArrayList<InputSplit>();
    LOG.info(splits.size() + " DelimitedSplits generated");
    Configuration conf = job.getConfiguration();
    char delimiter =0;
    ArrayList<Text> hlist = new ArrayList<Text>();
    for (InputSplit file: splits) {
        FileSplit fsplit = ((FileSplit)file);
        Path path = fsplit.getPath();
        FileSystem fs = path.getFileSystem(conf);
        
        if (fsplit.getStart() == 0) {
        // parse the inSplit, get the header
            FSDataInputStream fileIn = fs.open(path);

            String delimStr = conf.get(ConfigConstants.CONF_DELIMITER,
                ConfigConstants.DEFAULT_DELIMITER);
            if (delimStr.length() == 1) {
                delimiter = delimStr.charAt(0);
            } else {
                LOG.error("Incorrect delimitor: " + delimiter
                    + ". Expects single character.");
            }
            String encoding = conf.get(
                MarkLogicConstants.OUTPUT_CONTENT_ENCODING,
                MarkLogicConstants.DEFAULT_OUTPUT_CONTENT_ENCODING);
            InputStreamReader instream = new InputStreamReader(fileIn, encoding);
            CSVParser parser = new CSVParser(instream, CSVParserFormatter.
            		getFormat(delimiter, DelimitedTextReader.encapsulator,
            				true, true));
            Iterator<CSVRecord> it = parser.iterator();
            
            String[] header = null;
            if (it.hasNext()) {
            	CSVRecord record = (CSVRecord)it.next();
            	Iterator<String> recordIterator = record.iterator();
                int recordSize = record.size();
                header = new String[recordSize];
                for (int i = 0; i < recordSize; i++) {
                	if (recordIterator.hasNext()) {
                		header[i] = (String)recordIterator.next();
                	} else {
                		throw new IOException("Record size doesn't match the real size");
                	}
                }
                
                EncodingUtil.handleBOMUTF8(header, 0);
                
                hlist.clear();
                for (String s : header) {
                    hlist.add(new Text(s));
                }
            }
            instream.close();
        }
        
        DelimitedSplit ds = new DelimitedSplit(new TextArrayWritable(
            hlist.toArray(new Text[hlist.size()])), path,
            fsplit.getStart(), fsplit.getLength(),
            fsplit.getLocations());
        populatedSplits.add(ds);
    }
    
    return populatedSplits;
}

Source File: TajoMasterClientService.java From tajo with Apache License 2.0

4 votes

@Override
public TableResponse createExternalTable(RpcController controller, CreateTableRequest request)
    throws ServiceException {
  try {
    Session session = context.getSessionManager().getSession(request.getSessionId().getId());
    QueryContext queryContext = new QueryContext(conf, session);

    Path path = new Path(request.getPath());
    FileSystem fs = path.getFileSystem(conf);

    if (!fs.exists(path)) {
      throw new UnavailableTableLocationException(path.toString(), "no such a directory");
    }

    Schema schema = null;
    if (request.hasSchema()) {
      schema = SchemaFactory.newV1(request.getSchema());
    }

    TableMeta meta = new TableMeta(request.getMeta());
    PartitionMethodDesc partitionDesc = null;
    if (request.hasPartition()) {
      partitionDesc = new PartitionMethodDesc(request.getPartition());
    }

    TableDesc desc = context.getGlobalEngine().getDDLExecutor().getCreateTableExecutor().create(
        queryContext,
        request.getName(),
        null,
        schema,
        meta,
        path.toUri(),
        true,
        partitionDesc,
        false
    );

    return TableResponse.newBuilder()
        .setState(OK)
        .setTable(desc.getProto()).build();

  } catch (Throwable t) {
    printStackTraceIfError(LOG, t);
    return TableResponse.newBuilder()
        .setState(returnError(t))
        .build();
  }
}

Source File: CustomOutputCommitter.java From hadoop with Apache License 2.0

4 votes

private void writeFile(JobConf conf , String filename) throws IOException {
  System.out.println("writing file ----" + filename);
  Path outputPath = FileOutputFormat.getOutputPath(conf);
  FileSystem fs = outputPath.getFileSystem(conf);
  fs.create(new Path(outputPath, filename)).close();
}

Source File: DistCpSync.java From big-c with Apache License 2.0

4 votes

static boolean sync(DistCpOptions inputOptions, Configuration conf)
    throws IOException {
  List<Path> sourcePaths = inputOptions.getSourcePaths();
  if (sourcePaths.size() != 1) {
    // we only support one source dir which must be a snapshottable directory
    throw new IllegalArgumentException(sourcePaths.size()
        + " source paths are provided");
  }
  final Path sourceDir = sourcePaths.get(0);
  final Path targetDir = inputOptions.getTargetPath();

  final FileSystem sfs = sourceDir.getFileSystem(conf);
  final FileSystem tfs = targetDir.getFileSystem(conf);
  // currently we require both the source and the target file system are
  // DistributedFileSystem.
  if (!(sfs instanceof DistributedFileSystem) ||
      !(tfs instanceof DistributedFileSystem)) {
    throw new IllegalArgumentException("The FileSystems needs to" +
        " be DistributedFileSystem for using snapshot-diff-based distcp");
  }
  final DistributedFileSystem sourceFs = (DistributedFileSystem) sfs;
  final DistributedFileSystem targetFs= (DistributedFileSystem) tfs;

  // make sure targetFS has no change between from and the current states
  if (!checkNoChange(inputOptions, targetFs, targetDir)) {
    // set the source path using the snapshot path
    inputOptions.setSourcePaths(Arrays.asList(getSourceSnapshotPath(sourceDir,
        inputOptions.getToSnapshot())));
    return false;
  }

  Path tmpDir = null;
  try {
    tmpDir = createTargetTmpDir(targetFs, targetDir);
    DiffInfo[] diffs = getDiffs(inputOptions, sourceFs, sourceDir, targetDir);
    if (diffs == null) {
      return false;
    }
    // do the real sync work: deletion and rename
    syncDiff(diffs, targetFs, tmpDir);
    return true;
  } catch (Exception e) {
    DistCp.LOG.warn("Failed to use snapshot diff for distcp", e);
    return false;
  } finally {
    deleteTargetTmpDir(targetFs, tmpDir);
    // TODO: since we have tmp directory, we can support "undo" with failures
    // set the source path using the snapshot path
    inputOptions.setSourcePaths(Arrays.asList(getSourceSnapshotPath(sourceDir,
        inputOptions.getToSnapshot())));
  }
}

Source File: HBCKFsUtils.java From hbase-operator-tools with Apache License 2.0

2 votes

/**
 *
 * COPIED from CommonFSUtils.getRootDir
 *
 * @param c configuration
 * @return {@link Path} to hbase root directory from
 *     configuration as a qualified Path.
 * @throws IOException e
 */
public static Path getRootDir(final Configuration c) throws IOException {
  Path p = new Path(c.get(HConstants.HBASE_DIR));
  FileSystem fs = p.getFileSystem(c);
  return p.makeQualified(fs.getUri(), fs.getWorkingDirectory());
}

Java Code Examples for org.apache.hadoop.fs.Path#getFileSystem()