org.apache.hadoop.fs.FileSystem#exists

Source File: TestMiniMRClientCluster.java From big-c with Apache License 2.0

6 votes

@BeforeClass
public static void setup() throws IOException {
  final Configuration conf = new Configuration();
  final Path TEST_ROOT_DIR = new Path(System.getProperty("test.build.data",
      "/tmp"));
  testdir = new Path(TEST_ROOT_DIR, "TestMiniMRClientCluster");
  inDir = new Path(testdir, "in");
  outDir = new Path(testdir, "out");

  FileSystem fs = FileSystem.getLocal(conf);
  if (fs.exists(testdir) && !fs.delete(testdir, true)) {
    throw new IOException("Could not delete " + testdir);
  }
  if (!fs.mkdirs(inDir)) {
    throw new IOException("Mkdirs failed to create " + inDir);
  }

  for (int i = 0; i < inFiles.length; i++) {
    inFiles[i] = new Path(inDir, "part_" + i);
    createFile(inFiles[i], conf);
  }

  // create the mini cluster to be used for the tests
  mrCluster = MiniMRClientClusterFactory.create(
      InternalClass.class, 1, new Configuration());
}

Source File: MoveHDFS.java From nifi with Apache License 2.0

5 votes

protected Set<Path> selectFiles(final FileSystem hdfs, final Path inputPath, Set<Path> filesVisited)
        throws IOException {
    if (null == filesVisited) {
        filesVisited = new HashSet<>();
    }

    if (!hdfs.exists(inputPath)) {
        throw new IOException("Selection directory " + inputPath.toString() + " doesn't appear to exist!");
    }

    final Set<Path> files = new HashSet<>();

    FileStatus inputStatus = hdfs.getFileStatus(inputPath);

    if (inputStatus.isDirectory()) {
        for (final FileStatus file : hdfs.listStatus(inputPath)) {
            final Path canonicalFile = file.getPath();

            if (!filesVisited.add(canonicalFile)) { // skip files we've already seen (may be looping directory links)
                continue;
            }

            if (!file.isDirectory() && processorConfig.getPathFilter(inputPath).accept(canonicalFile)) {
                files.add(canonicalFile);

                if (getLogger().isDebugEnabled()) {
                    getLogger().debug(this + " selected file at path: " + canonicalFile.toString());
                }
            }
        }
    } else if (inputStatus.isFile()) {
        files.add(inputPath);
    }
    return files;
}

Source File: SnapshotManager.java From hbase with Apache License 2.0

5 votes

/**
 * Check to see if the snapshot is one of the currently completed snapshots
 * Returns true if the snapshot exists in the "completed snapshots folder".
 *
 * @param snapshot expected snapshot to check
 * @return <tt>true</tt> if the snapshot is stored on the {@link FileSystem}, <tt>false</tt> if is
 *         not stored
 * @throws IOException if the filesystem throws an unexpected exception,
 * @throws IllegalArgumentException if snapshot name is invalid.
 */
private boolean isSnapshotCompleted(SnapshotDescription snapshot) throws IOException {
  try {
    final Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir);
    FileSystem fs = master.getMasterFileSystem().getFileSystem();
    // check to see if the snapshot already exists
    return fs.exists(snapshotDir);
  } catch (IllegalArgumentException iae) {
    throw new UnknownSnapshotException("Unexpected exception thrown", iae);
  }
}

Source File: TestEmptyJob.java From RDFS with Apache License 2.0

5 votes

@Override
public void commitJob(JobContext context) throws IOException {
  Configuration conf = context.getConfiguration();
  Path share = new Path(conf.get("share"));
  FileSystem fs = FileSystem.get(conf);

  
  while (true) {
    if (fs.exists(share)) {
      break;
    }
    UtilsForTests.waitFor(100);
  }
  super.commitJob(context);
}

Source File: TestPseudoLocalFs.java From big-c with Apache License 2.0

5 votes

/**
 * Validate if exists() returns <code>true</code> for correctly formed file
 * paths on PseudoLocalFs and returns <code>false</code> for improperly
 * formed file paths.
 * @param pfs Pseudo Local File System
 * @param path file path for which exists() is to be called
 * @param shouldSucceed expected return value of exists(&lt;path&gt;)
 * @throws IOException
 */
private void validateExists(FileSystem pfs, Path path,
    boolean shouldSucceed) throws IOException {
  boolean ret = pfs.exists(path);
  if (shouldSucceed) {
    assertTrue("exists() returned false for valid file name " + path, ret);
  } else {
    assertFalse("exists() returned true for invalid file name " + path, ret);
  }
}

Source File: TestTFileSeek.java From hadoop-gpu with Apache License 2.0

5 votes

private static FSDataOutputStream createFSOutput(Path name, FileSystem fs)
  throws IOException {
  if (fs.exists(name)) {
    fs.delete(name, true);
  }
  FSDataOutputStream fout = fs.create(name);
  return fout;
}

Source File: HiveRegisterStep.java From incubator-gobblin with Apache License 2.0

5 votes

@Override
public void execute() throws IOException {

  if (this.verifyBeforeRegistering) {
    if (!this.hiveSpec.getTable().getLocation().isPresent()) {
      throw getException("Table does not have a location parameter.");
    }
    Path tablePath = new Path(this.hiveSpec.getTable().getLocation().get());

    FileSystem fs = this.hiveSpec.getPath().getFileSystem(new Configuration());
    if (!fs.exists(tablePath)) {
      throw getException(String.format("Table location %s does not exist.", tablePath));
    }

    if (this.hiveSpec.getPartition().isPresent()) {

      if (!this.hiveSpec.getPartition().get().getLocation().isPresent()) {
        throw getException("Partition does not have a location parameter.");
      }
      Path partitionPath = new Path(this.hiveSpec.getPartition().get().getLocation().get());
      if (!fs.exists(this.hiveSpec.getPath())) {
        throw getException(String.format("Partition location %s does not exist.", partitionPath));
      }
    }
  }

  try (HiveRegister hiveRegister = HiveRegister.get(this.props, this.metastoreURI)) {
    log.info("Registering Hive Spec " + this.hiveSpec);
    ListenableFuture<Void> future = hiveRegister.register(this.hiveSpec);
    future.get();
  } catch (InterruptedException | ExecutionException ie) {
    throw new IOException("Hive registration was interrupted.", ie);
  }
}

Source File: GorillaStore.java From timely with Apache License 2.0

5 votes

protected void writeCompressor(String metric, WrappedGorillaCompressor wrappedGorillaCompressor)
        throws IOException {

    try {
        Configuration configuration = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://localhost:8020"), configuration);
        SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd-HHmmss.SSS");
        sdf.setTimeZone(TimeZone.getTimeZone("GMT"));
        String baseDir = "/timely/cache";
        Path directory = new Path(baseDir + "/" + metric);
        String fileName = metric + "-" + sdf.format(new Date(wrappedGorillaCompressor.getOldestTimestamp()));
        Path outputPath = new Path(directory, fileName);
        if (!fs.exists(directory)) {
            fs.mkdirs(directory);
        }
        if (fs.exists(outputPath)) {
            throw new IOException("output path exists");
        }
        OutputStream os = fs.create(outputPath);
        // write object to hdfs file
        ObjectOutputStream oos = new ObjectOutputStream(os);
        oos.writeObject(wrappedGorillaCompressor);
        oos.close();
    } catch (URISyntaxException e) {
        throw new IOException(e);
    }
}

Source File: LateFileRecordCountProvider.java From incubator-gobblin with Apache License 2.0

5 votes

/**
 * Construct filename for a late file. If the file does not exists in the output dir, retain the original name.
 * Otherwise, append a LATE_COMPONENT{RandomInteger} to the original file name.
 * For example, if file "part1.123.avro" exists in dir "/a/b/", the returned path will be "/a/b/part1.123.late12345.avro".
 */
public Path constructLateFilePath(String originalFilename, FileSystem fs, Path outputDir) throws IOException {
  if (!fs.exists(new Path(outputDir, originalFilename))) {
    return new Path(outputDir, originalFilename);
  }
  return constructLateFilePath(FilenameUtils.getBaseName(originalFilename) + LATE_COMPONENT
      + new Random().nextInt(Integer.MAX_VALUE) + SEPARATOR + FilenameUtils.getExtension(originalFilename), fs,
      outputDir);
}

Source File: CommonFriendStep2.java From BigData-In-Practice with Apache License 2.0

5 votes

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    Configuration conf = new Configuration();

    Job job = Job.getInstance(conf);

    job.setJarByClass(CommonFriendStep2.class);
    // 设置job的mapper类和reducer类
    job.setMapperClass(CommonFansStep2Mapper.class);
    job.setReducerClass(CommonFansStep2Reducer.class);

    // 设置map阶段输出key:value数据的类型
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    // 设置reudce阶段输出key:value数据的类型
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // 检测输出目录是否已存在，如果已存在则删除，以免在测试阶段需要反复手动删除输出目录
    FileSystem fs = FileSystem.get(conf);
    Path out = new Path(args[1]);
    if(fs.exists(out)) {
        fs.delete(out, true);
    }

    // 设置数据输入输出目录
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job,out);

    // 提交job到yarn或者local runner执行
    job.waitForCompletion(true);

}

Source File: SchemaHandler.java From Bats with Apache License 2.0

5 votes

/**
 * If raw schema was present in create schema command, returns schema from command,
 * otherwise loads raw schema from the given file.
 *
 * @param sqlCall sql create schema call
 * @return string representation of raw schema (column names, types and nullability)
 */
private String getSchemaString(SqlSchema.Create sqlCall) {
  if (sqlCall.hasSchema()) {
    return sqlCall.getSchema();
  }

  Path path = new Path(sqlCall.getLoad());
  try {
    FileSystem rawFs = path.getFileSystem(new Configuration());
    FileSystem fs = ImpersonationUtil.createFileSystem(ImpersonationUtil.getProcessUserName(), rawFs.getConf());

    if (!fs.exists(path)) {
      throw UserException.resourceError()
        .message("File with raw schema [%s] does not exist", path.toUri().getPath())
        .build(logger);
    }

    try (InputStream stream = fs.open(path)) {
      return IOUtils.toString(stream);
    }

  } catch (IOException e) {
    throw UserException.resourceError(e)
      .message("Unable to load raw schema from file %s", path.toUri().getPath())
      .build(logger);
  }
}

Source File: FileUtils.java From sqoop-on-spark with Apache License 2.0

5 votes

public static void mkdirs(String directory) throws IOException {
  Path path = new Path(directory);
  FileSystem fs = path.getFileSystem(new Configuration());
  if (!fs.exists(path)) {
    fs.mkdirs(path);
  }
}

Source File: TestMapperReducerCleanup.java From hadoop with Apache License 2.0

5 votes

@Test
public void testReduceCleanup() throws Exception {
  reset();
  
  Job job = Job.getInstance();

  Path inputPath = createInput();
  Path outputPath = getOutputPath();

  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.getLocal(conf);

  if (fs.exists(outputPath)) {
    fs.delete(outputPath, true);
  }

  job.setMapperClass(TrackingTokenizerMapper.class);
  job.setReducerClass(FailingReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  job.setInputFormatClass(TrackingTextInputFormat.class);
  job.setOutputFormatClass(TrackingTextOutputFormat.class);
  job.setNumReduceTasks(1);
  FileInputFormat.addInputPath(job, inputPath);
  FileOutputFormat.setOutputPath(job, outputPath);

  job.waitForCompletion(true);

  Assert.assertTrue(mapCleanup);
  Assert.assertTrue(reduceCleanup);
  Assert.assertTrue(recordReaderCleanup);
  Assert.assertTrue(recordWriterCleanup);
}

Source File: PVMinMax2.java From MapReduce-Demo with MIT License

4 votes

public static void main(String[] args) throws Exception {		
	//1.设置HDFS配置信息
	String namenode_ip = "192.168.17.10";
	String hdfs = "hdfs://" + namenode_ip + ":9000";			
	Configuration conf = new Configuration();
	conf.set("fs.defaultFS", hdfs);
	conf.set("mapreduce.app-submission.cross-platform", "true");

	//2.设置MapReduce作业配置信息
	String jobName = "PVMinMax2";					//作业名称
	Job job = Job.getInstance(conf, jobName);
	job.setJarByClass(PVMinMax2.class);				//指定运行时作业类
	job.setJar("export\\PVMinMax2.jar");			//指定本地jar包
	job.setMapperClass(PVMinMax2Mapper.class);		//指定Mapper类
	job.setMapOutputKeyClass(Text.class);			//设置Mapper输出Key类型
	job.setMapOutputValueClass(Text.class);			//设置Mapper输出Value类型
	job.setReducerClass(PVMinMax2Reducer.class);	//指定Reducer类
	job.setOutputKeyClass(Text.class);				//设置Reduce输出Key类型
	job.setOutputValueClass(Text.class); 	//设置Reduce输出Value类型
	
	//3.设置作业输入和输出路径
	String dataDir = "/expr/weblog/output5_1";			//实验数据目录	
	String outputDir = "/expr/weblog/output5_2";		//实验输出目录
	Path inPath = new Path(hdfs + dataDir);
	Path outPath = new Path(hdfs + outputDir);
	FileInputFormat.addInputPath(job, inPath);
	FileOutputFormat.setOutputPath(job, outPath);
	FileSystem fs = FileSystem.get(conf);
	if(fs.exists(outPath)) {
		fs.delete(outPath, true);
	}
	
	//4.运行作业
	System.out.println("Job: " + jobName + " is running...");
	if(job.waitForCompletion(true)) {
		System.out.println("success!");
		System.exit(0);
	} else {
		System.out.println("failed!");
		System.exit(1);
	}
}

Source File: MergeStatisticsWithOldStep.java From kylin-on-parquet-v2 with Apache License 2.0

4 votes

@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager mgr = CubeManager.getInstance(context.getConfig());
    final CubeInstance cube = mgr.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment optimizeSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));

    CubeSegment oldSegment = optimizeSegment.getCubeInstance().getOriginalSegmentToOptimize(optimizeSegment);
    Preconditions.checkNotNull(oldSegment,
            "cannot find the original segment to be optimized by " + optimizeSegment);

    KylinConfig kylinConf = cube.getConfig();
    Configuration conf = HadoopUtil.getCurrentConfiguration();
    ResourceStore rs = ResourceStore.getStore(kylinConf);
    int averageSamplingPercentage = 0;

    try {
        //1. Add statistics from optimized segment
        Path statisticsDirPath = new Path(CubingExecutableUtil.getStatisticsPath(this.getParams()));
        FileSystem hdfs = FileSystem.get(conf);
        if (!hdfs.exists(statisticsDirPath)) {
            throw new IOException("StatisticsFilePath " + statisticsDirPath + " does not exists");
        }

        if (!hdfs.isDirectory(statisticsDirPath)) {
            throw new IOException("StatisticsFilePath " + statisticsDirPath + " is not a directory");
        }

        Path[] statisticsFiles = HadoopUtil.getFilteredPath(hdfs, statisticsDirPath,
                BatchConstants.CFG_OUTPUT_STATISTICS);
        if (statisticsFiles == null) {
            throw new IOException("fail to find the statistics file in base dir: " + statisticsDirPath);
        }

        for (Path item : statisticsFiles) {
            CubeStatsReader optimizeSegmentStatsReader = new CubeStatsReader(optimizeSegment, null,
                    optimizeSegment.getConfig(), item);
            averageSamplingPercentage += optimizeSegmentStatsReader.getSamplingPercentage();
            addFromCubeStatsReader(optimizeSegmentStatsReader);
        }

        //2. Add statistics from old segment
        CubeStatsReader oldSegmentStatsReader = new CubeStatsReader(oldSegment, null, oldSegment.getConfig());
        averageSamplingPercentage += oldSegmentStatsReader.getSamplingPercentage();
        addFromCubeStatsReader(oldSegmentStatsReader);

        logger.info("Cuboid set with stats info: " + cuboidHLLMap.keySet().toString());
        //3. Store merged statistics for recommend cuboids
        averageSamplingPercentage = averageSamplingPercentage / 2;
        Set<Long> cuboidsRecommend = cube.getCuboidsRecommend();

        Map<Long, HLLCounter> resultCuboidHLLMap = Maps.newHashMapWithExpectedSize(cuboidsRecommend.size());
        for (Long cuboid : cuboidsRecommend) {
            HLLCounter hll = cuboidHLLMap.get(cuboid);
            if (hll == null) {
                logger.warn("Cannot get the row count stats for cuboid " + cuboid);
            } else {
                resultCuboidHLLMap.put(cuboid, hll);
            }
        }

        String resultDir = CubingExecutableUtil.getMergedStatisticsPath(this.getParams());
        CubeStatsWriter.writeCuboidStatistics(conf, new Path(resultDir), resultCuboidHLLMap,
                averageSamplingPercentage, oldSegmentStatsReader.getSourceRowCount());

        try (FSDataInputStream mergedStats = hdfs
                .open(new Path(resultDir, BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME))) {
            // put the statistics to metadata store
            String statisticsFileName = optimizeSegment.getStatisticsResourcePath();
            rs.putResource(statisticsFileName, mergedStats, System.currentTimeMillis());
        }

        //By default, the cube optimization will use in-memory cubing
        CubingJob cubingJob = (CubingJob) getManager()
                .getJob(CubingExecutableUtil.getCubingJobId(this.getParams()));
        StatisticsDecisionUtil.decideCubingAlgorithm(cubingJob, optimizeSegment);

        return new ExecuteResult();
    } catch (IOException e) {
        logger.error("fail to merge cuboid statistics", e);
        return ExecuteResult.createError(e);
    }

}

Source File: DatePartition2.java From MapReduce-Demo with MIT License

4 votes

public static void main(String[] args) throws Exception {		
	//1.设置HDFS配置信息
	String namenode_ip = "192.168.17.10";
	String hdfs = "hdfs://" + namenode_ip + ":9000";			
	Configuration conf = new Configuration();
	conf.set("fs.defaultFS", hdfs);
	conf.set("mapreduce.app-submission.cross-platform", "true");

	//2.设置MapReduce作业配置信息
	String jobName = "DatePartition2";					//定义作业名称
	Job job = Job.getInstance(conf, jobName);
	job.setJarByClass(DatePartition2.class);			//指定运行时作业类
	job.setJar("export\\DatePartition2.jar");			//指定本地jar包
	job.setMapperClass(DatePartition2Mapper.class);		//指定Mapper类
	job.setMapOutputKeyClass(Text.class);				//设置Mapper输出Key类型
	job.setMapOutputValueClass(IntWritable.class);		//设置Mapper输出Value类型
	job.setReducerClass(DatePartition2Reducer.class);	//指定Reducer类
	job.setOutputKeyClass(Text.class);					//设置Reduce输出Key类型
	job.setOutputValueClass(IntWritable.class);			//设置Reduce输出Value类型
	job.setPartitionerClass(YearPartitioner.class);		//自定义分区方法
	job.setNumReduceTasks(3); 	//设置reduce任务的数量,该值传递给Partitioner.getPartition()方法的numPartitions参数
	
	//3.设置作业输入和输出路径
	String dataDir = "/expr/datecount/data";				//实验数据目录	
	String outputDir = "/expr/datecount/output_partition2";	//实验输出目录
	Path inPath = new Path(hdfs + dataDir);
	Path outPath = new Path(hdfs + outputDir);
	FileInputFormat.addInputPath(job, inPath);
	FileOutputFormat.setOutputPath(job, outPath);
	FileSystem fs = FileSystem.get(conf);
	if(fs.exists(outPath)) {
		fs.delete(outPath, true);
	}
	
	//4.运行作业
	System.out.println("Job: " + jobName + " is running...");
	if(job.waitForCompletion(true)) {
		System.out.println("success!");
		System.exit(0);
	} else {
		System.out.println("failed!");
		System.exit(1);
	}
}

Source File: FlowCount.java From MapReduce-Demo with MIT License

4 votes

public static void main(String[] args) throws Exception {		
	//1.设置HDFS配置信息
	String namenode_ip = "192.168.17.10";
	String hdfs = "hdfs://" + namenode_ip + ":9000";			
	Configuration conf = new Configuration();
	conf.set("fs.defaultFS", hdfs);
	conf.set("mapreduce.app-submission.cross-platform", "true");

	//2.设置MapReduce作业配置信息
	String jobName = "FlowCount";					//作业名称
	Job job = Job.getInstance(conf, jobName);
	job.setJarByClass(FlowCount.class);				//指定运行时作业类
	job.setJar("export\\FlowCount.jar");			//指定本地jar包
	job.setMapperClass(FlowCountMapper.class);		//指定Mapper类
	job.setMapOutputKeyClass(Text.class);			//设置Mapper输出Key类型
	job.setMapOutputValueClass(IntWritable.class);	//设置Mapper输出Value类型
	job.setReducerClass(FlowCountReducer.class);	//指定Reducer类
	job.setOutputKeyClass(Text.class);				//设置Reduce输出Key类型
	job.setOutputValueClass(IntWritable.class); 	//设置Reduce输出Value类型
	
	//3.设置作业输入和输出路径
	String dataDir = "/expr/weblog/data";			//实验数据目录	
	String outputDir = "/expr/weblog/output1";		//实验输出目录
	Path inPath = new Path(hdfs + dataDir);
	Path outPath = new Path(hdfs + outputDir);
	FileInputFormat.addInputPath(job, inPath);
	FileOutputFormat.setOutputPath(job, outPath);
	FileSystem fs = FileSystem.get(conf);
	if(fs.exists(outPath)) {
		fs.delete(outPath, true);
	}
	
	//4.运行作业
	System.out.println("Job: " + jobName + " is running...");
	if(job.waitForCompletion(true)) {
		System.out.println("success!");
		System.exit(0);
	} else {
		System.out.println("failed!");
		System.exit(1);
	}
}

Source File: StramClient.java From attic-apex-core with Apache License 2.0

4 votes

public void copyInitialState(Path origAppDir) throws IOException
{
  // locate previous snapshot
  long copyStart = System.currentTimeMillis();
  String newAppDir = this.dag.assertAppPath();

  FSRecoveryHandler recoveryHandler = new FSRecoveryHandler(origAppDir.toString(), conf);
  // read snapshot against new dependencies
  Object snapshot = recoveryHandler.restore();
  if (snapshot == null) {
    throw new IllegalArgumentException("No previous application state found in " + origAppDir);
  }
  InputStream logIs = recoveryHandler.getLog();

  // modify snapshot state to switch app id
  ((StreamingContainerManager.CheckpointState)snapshot).setApplicationId(this.dag, conf);
  Path checkpointPath = new Path(newAppDir, LogicalPlan.SUBDIR_CHECKPOINTS);

  FileSystem fs = FileSystem.newInstance(origAppDir.toUri(), conf);
  // remove the path that was created by the storage agent during deserialization and replacement
  fs.delete(checkpointPath, true);

  // write snapshot to new location
  recoveryHandler = new FSRecoveryHandler(newAppDir, conf);
  recoveryHandler.save(snapshot);
  OutputStream logOs = recoveryHandler.rotateLog();
  IOUtils.copy(logIs, logOs);
  logOs.flush();
  logOs.close();
  logIs.close();

  List<String> excludeDirs = Arrays.asList(LogicalPlan.SUBDIR_CHECKPOINTS, LogicalPlan.SUBDIR_EVENTS, LogicalPlan.SUBDIR_STATS);
  // copy sub directories that are not present in target
  FileStatus[] lFiles = fs.listStatus(origAppDir);

  // In case of MapR/MapR-FS, f.getPath().toString() returns path as maprfs:///<orig app dir>
  // whereas origAppDir.toString & newAppDir are in maprfs:/<orig or new app dir> format
  // e.g.
  // f.getPath().toString -> maprfs:///user/dtadmin/datatorrent/apps/application_1481890072066_0004/checkpoints
  // origAppDir -> maprfs:/user/dtadmin/datatorrent/apps/application_1481890072066_0004
  // newAppDir -> maprfs:/user/dtadmin/datatorrent/apps/application_1481890072066_0005

  String origAppDirPath = Path.getPathWithoutSchemeAndAuthority(origAppDir).toString();
  String newAppDirPath = Path.getPathWithoutSchemeAndAuthority(new Path(newAppDir)).toString();

  for (FileStatus f : lFiles) {
    if (f.isDirectory() && !excludeDirs.contains(f.getPath().getName())) {
      String targetPath = f.getPath().toString().replace(origAppDirPath, newAppDirPath);
      if (!fs.exists(new Path(targetPath))) {
        LOG.debug("Copying {} size {} to {}", f.getPath(), f.getLen(), targetPath);
        long start = System.currentTimeMillis();
        FileUtil.copy(fs, f.getPath(), fs, new Path(targetPath), false, conf);
        LOG.debug("Copying {} to {} took {} ms", f.getPath(), f.getLen(), targetPath, System.currentTimeMillis() - start);
      } else {
        LOG.debug("Ignoring {} as it already exists under {}", f.getPath(), targetPath);
      }
    }
  }
  LOG.info("Copying initial state took {} ms", System.currentTimeMillis() - copyStart);
}

Source File: IPCount.java From MapReduce-Demo with MIT License

4 votes

public static void main(String[] args) throws Exception {		
	//1.设置HDFS配置信息
	String namenode_ip = "192.168.17.10";
	String hdfs = "hdfs://" + namenode_ip + ":9000";			
	Configuration conf = new Configuration();
	conf.set("fs.defaultFS", hdfs);
	conf.set("mapreduce.app-submission.cross-platform", "true");

	//2.设置MapReduce作业配置信息
	String jobName = "IPCount";						//作业名称
	Job job = Job.getInstance(conf, jobName);
	job.setJarByClass(IPCount.class);				//指定运行时作业类
	job.setJar("export\\IPCount.jar");				//指定本地jar包
	job.setMapperClass(IPCountMapper.class);		//指定Mapper类
	job.setMapOutputKeyClass(DayAndIp.class);		//设置Mapper输出Key类型
	job.setMapOutputValueClass(IntWritable.class);	//设置Mapper输出Value类型
	job.setReducerClass(IPCountReducer.class);		//指定Reducer类
	job.setOutputKeyClass(DayAndIp.class);			//设置Reduce输出Key类型
	job.setOutputValueClass(IntWritable.class); 	//设置Reduce输出Value类型
	
	//3.设置作业输入和输出路径
	String dataDir = "/expr/weblog/data";			//实验数据目录	
	String outputDir = "/expr/weblog/output4";		//实验输出目录
	Path inPath = new Path(hdfs + dataDir);
	Path outPath = new Path(hdfs + outputDir);
	FileInputFormat.addInputPath(job, inPath);
	FileOutputFormat.setOutputPath(job, outPath);
	FileSystem fs = FileSystem.get(conf);
	if(fs.exists(outPath)) {
		fs.delete(outPath, true);
	}
	
	//4.运行作业
	System.out.println("Job: " + jobName + " is running...");
	if(job.waitForCompletion(true)) {
		System.out.println("success!");
		System.exit(0);
	} else {
		System.out.println("failed!");
		System.exit(1);
	}
}

Source File: LockUtil.java From anthelion with Apache License 2.0

3 votes

/**
 * Remove lock file. NOTE: applications enforce the semantics of this file -
 * this method simply removes any file with a given name.
 * @param fs filesystem
 * @param lockFile lock file name
 * @return false, if the lock file doesn't exist. True, if it existed and was
 * successfully removed.
 * @throws IOException if lock file exists but it is a directory.
 */
public static boolean removeLockFile(FileSystem fs, Path lockFile) throws IOException {
  if (!fs.exists(lockFile)) return false;
  if (fs.getFileStatus(lockFile).isDir())
    throw new IOException("lock file " + lockFile + " exists but is a directory!");
  return fs.delete(lockFile, false);
}

Java Code Examples for org.apache.hadoop.fs.FileSystem#exists()