org.apache.hadoop.mapred.FileAlreadyExistsException Java Exaples

Source File: FileOutputFormat.java From big-c with Apache License 2.0

6 votes

public void checkOutputSpecs(JobContext job
                             ) throws FileAlreadyExistsException, IOException{
  // Ensure that the output directory is set and not already there
  Path outDir = getOutputPath(job);
  if (outDir == null) {
    throw new InvalidJobConfException("Output directory not set.");
  }

  // get delegation token for outDir's file system
  TokenCache.obtainTokensForNamenodes(job.getCredentials(),
      new Path[] { outDir }, job.getConfiguration());

  if (outDir.getFileSystem(job.getConfiguration()).exists(outDir)) {
    throw new FileAlreadyExistsException("Output directory " + outDir + 
                                         " already exists");
  }
}

Source File: FileOutputFormat.java From hadoop with Apache License 2.0

6 votes

public void checkOutputSpecs(JobContext job
                             ) throws FileAlreadyExistsException, IOException{
  // Ensure that the output directory is set and not already there
  Path outDir = getOutputPath(job);
  if (outDir == null) {
    throw new InvalidJobConfException("Output directory not set.");
  }

  // get delegation token for outDir's file system
  TokenCache.obtainTokensForNamenodes(job.getCredentials(),
      new Path[] { outDir }, job.getConfiguration());

  if (outDir.getFileSystem(job.getConfiguration()).exists(outDir)) {
    throw new FileAlreadyExistsException("Output directory " + outDir + 
                                         " already exists");
  }
}

Source File: ZephyrOutputFormat.java From zephyr with Apache License 2.0

6 votes

@Override
public void checkOutputSpecs(FileSystem ignored, JobConf job) throws FileAlreadyExistsException, InvalidJobConfException, IOException {
    // Ensure that the output directory is set and not already there
    Path outDir = getOutputPath(job);
    if (outDir == null && job.getNumReduceTasks() != 0) {
        throw new InvalidJobConfException("Output directory not set in JobConf.");
    }
    if (outDir != null) {
        FileSystem fs = outDir.getFileSystem(job);
        // normalize the output directory
        outDir = fs.makeQualified(outDir);
        setOutputPath(job, outDir);

        // get delegation token for the outDir's file system
        TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[]{outDir}, job);
        String jobUuid = job.get("zephyr.job.uuid");
        if (jobUuid == null)
            throw new InvalidJobConfException("This output format REQUIRES the value zephyr.job.uuid to be specified in the job configuration!");
        // // check its existence
        // if (fs.exists(outDir)) {
        // throw new FileAlreadyExistsException("Output directory " + outDir
        // + " already exists");
        // }
    }
}

Source File: NativeSparkDataSet.java From spliceengine with GNU Affero General Public License v3.0

6 votes

/**
 * Overridden to avoid throwing an exception if the specified directory
 * for export already exists.
 */
@Override
public void checkOutputSpecs(JobContext job) throws FileAlreadyExistsException, IOException {
    Path outDir = getOutputPath(job);
    if(outDir == null) {
        throw new InvalidJobConfException("Output directory not set.");
    } else {
        TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[]{outDir}, job.getConfiguration());
        /*
        if(outDir.getFileSystem(job.getConfiguration()).exists(outDir)) {
            System.out.println("Output dir already exists, no problem");
            throw new FileAlreadyExistsException("Output directory " + outDir + " already exists");
        }
        */
    }
}

Source File: SparkDataSet.java From spliceengine with GNU Affero General Public License v3.0

6 votes

/**
 * Overridden to avoid throwing an exception if the specified directory
 * for export already exists.
 */
@Override
public void checkOutputSpecs(JobContext job) throws FileAlreadyExistsException, IOException {
    Path outDir = getOutputPath(job);
    if(outDir == null) {
        throw new InvalidJobConfException("Output directory not set.");
    } else {
        TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[]{outDir}, job.getConfiguration());
        /*
        if(outDir.getFileSystem(job.getConfiguration()).exists(outDir)) {
            System.out.println("Output dir already exists, no problem");
            throw new FileAlreadyExistsException("Output directory " + outDir + " already exists");
        }
        */
    }
}

Source File: TeraOutputFormat.java From hadoop with Apache License 2.0

5 votes

@Override
public void checkOutputSpecs(JobContext job
                            ) throws InvalidJobConfException, IOException {
  // Ensure that the output directory is set
  Path outDir = getOutputPath(job);
  if (outDir == null) {
    throw new InvalidJobConfException("Output directory not set in JobConf.");
  }

  final Configuration jobConf = job.getConfiguration();

  // get delegation token for outDir's file system
  TokenCache.obtainTokensForNamenodes(job.getCredentials(),
      new Path[] { outDir }, jobConf);

  final FileSystem fs = outDir.getFileSystem(jobConf);

  if (fs.exists(outDir)) {
    // existing output dir is considered empty iff its only content is the
    // partition file.
    //
    final FileStatus[] outDirKids = fs.listStatus(outDir);
    boolean empty = false;
    if (outDirKids != null && outDirKids.length == 1) {
      final FileStatus st = outDirKids[0];
      final String fname = st.getPath().getName();
      empty =
        !st.isDirectory() && TeraInputFormat.PARTITION_FILENAME.equals(fname);
    }
    if (TeraSort.getUseSimplePartitioner(job) || !empty) {
      throw new FileAlreadyExistsException("Output directory " + outDir
          + " already exists");
    }
  }
}

Source File: TeraOutputFormat.java From pravega-samples with Apache License 2.0

5 votes

@Override
public void checkOutputSpecs(JobContext job
                            ) throws InvalidJobConfException, IOException {
  // Ensure that the output directory is set
  Path outDir = getOutputPath(job);
  if (outDir == null) {
    throw new InvalidJobConfException("Output directory not set in JobConf.");
  }

  final Configuration jobConf = job.getConfiguration();

  // get delegation token for outDir's file system
  TokenCache.obtainTokensForNamenodes(job.getCredentials(),
      new Path[] { outDir }, jobConf);

  final FileSystem fs = outDir.getFileSystem(jobConf);

  try {
    // existing output dir is considered empty iff its only content is the
    // partition file.
    //
    final FileStatus[] outDirKids = fs.listStatus(outDir);
    boolean empty = false;
    if (outDirKids != null && outDirKids.length == 1) {
      final FileStatus st = outDirKids[0];
      final String fname = st.getPath().getName();
      empty =
        !st.isDirectory() && TeraInputFormat.PARTITION_FILENAME.equals(fname);
    }
    if (TeraSort.getUseSimplePartitioner(job) || !empty) {
      throw new FileAlreadyExistsException("Output directory " + outDir
          + " already exists");
    }
  } catch (FileNotFoundException ignored) {
  }
}

Source File: TeraOutputFormat.java From big-c with Apache License 2.0

5 votes

@Override
public void checkOutputSpecs(JobContext job
                            ) throws InvalidJobConfException, IOException {
  // Ensure that the output directory is set
  Path outDir = getOutputPath(job);
  if (outDir == null) {
    throw new InvalidJobConfException("Output directory not set in JobConf.");
  }

  final Configuration jobConf = job.getConfiguration();

  // get delegation token for outDir's file system
  TokenCache.obtainTokensForNamenodes(job.getCredentials(),
      new Path[] { outDir }, jobConf);

  final FileSystem fs = outDir.getFileSystem(jobConf);

  if (fs.exists(outDir)) {
    // existing output dir is considered empty iff its only content is the
    // partition file.
    //
    final FileStatus[] outDirKids = fs.listStatus(outDir);
    boolean empty = false;
    if (outDirKids != null && outDirKids.length == 1) {
      final FileStatus st = outDirKids[0];
      final String fname = st.getPath().getName();
      empty =
        !st.isDirectory() && TeraInputFormat.PARTITION_FILENAME.equals(fname);
    }
    if (TeraSort.getUseSimplePartitioner(job) || !empty) {
      throw new FileAlreadyExistsException("Output directory " + outDir
          + " already exists");
    }
  }
}

Source File: ForwardingBigQueryFileOutputFormat.java From hadoop-connectors with Apache License 2.0

5 votes

/**
 * Checks to make sure the configuration is valid, the output path doesn't already exist, and that
 * a connection to BigQuery can be established.
 */
@Override
public void checkOutputSpecs(JobContext job) throws FileAlreadyExistsException, IOException {
  Configuration conf = job.getConfiguration();

  // Validate the output configuration.
  BigQueryOutputConfiguration.validateConfiguration(conf);

  // Get the output path.
  Path outputPath = BigQueryOutputConfiguration.getGcsOutputPath(conf);
  logger.atInfo().log("Using output path '%s'.", outputPath);

  // Error if the output path already exists.
  FileSystem outputFileSystem = outputPath.getFileSystem(conf);
  if (outputFileSystem.exists(outputPath)) {
    throw new IOException("The output path '" + outputPath + "' already exists.");
  }

  // Error if compression is set as there's mixed support in BigQuery.
  if (FileOutputFormat.getCompressOutput(job)) {
    throw new IOException("Compression isn't supported for this OutputFormat.");
  }

  // Error if unable to create a BigQuery helper.
  try {
    new BigQueryFactory().getBigQueryHelper(conf);
  } catch (GeneralSecurityException gse) {
    throw new IOException("Failed to create BigQuery client", gse);
  }

  // Let delegate process its checks.
  getDelegate(conf).checkOutputSpecs(job);
}

Source File: PigOutputFormat.java From spork with Apache License 2.0

5 votes

private void checkOutputSpecsHelper(List<POStore> stores, JobContext
        jobcontext) throws IOException, InterruptedException {
    for (POStore store : stores) {
        // make a copy of the original JobContext so that
        // each OutputFormat get a different copy
        JobContext jobContextCopy = HadoopShims.createJobContext(
                jobcontext.getConfiguration(), jobcontext.getJobID());

        // set output location
        PigOutputFormat.setLocation(jobContextCopy, store);

        StoreFuncInterface sFunc = store.getStoreFunc();
        OutputFormat of = sFunc.getOutputFormat();

        // The above call should have update the conf in the JobContext
        // to have the output location - now call checkOutputSpecs()
        try {
            of.checkOutputSpecs(jobContextCopy);
        } catch (IOException ioe) {
            boolean shouldThrowException = true;
            if (sFunc instanceof OverwritableStoreFunc) {
                if (((OverwritableStoreFunc) sFunc).shouldOverwrite()) {
                    if (ioe instanceof FileAlreadyExistsException
                            || ioe instanceof org.apache.hadoop.fs.FileAlreadyExistsException) {
                        shouldThrowException = false;
                    }
                }
            }
            if (shouldThrowException)
                throw ioe;
        }
    }
}

Source File: TestNativeMapReduce.java From spork with Apache License 2.0

5 votes

@Test
public void testNativeMRJobSimpleFailure() throws Exception{
    try{
        //test if correct return code is obtained when query fails
        // the native MR is writing to an exisiting and should fail

        Collection<String> results = new HashSet<String>();
        results.add("(one,1)");
        results.add("(two,2)");
        results.add("(three,3)");

        pigServer.setBatchOn();
        pigServer.registerQuery("A = load '" + INPUT_FILE + "';");
        pigServer.registerQuery("B = mapreduce '" + jarFileName + "' " +
                "Store A into 'table_testNativeMRJobSimple_input' "+
                "Load 'table_testNativeMRJobSimple_output' "+
        "`org.apache.pig.test.utils.WordCount table_testNativeMRJobSimple_input " + INPUT_FILE + "`;");
        pigServer.registerQuery("Store B into 'table_testNativeMRJobSimpleDir';");
        pigServer.executeBatch();

        assertTrue("job failed", PigStats.get().getReturnCode() != 0);

    } catch (JobCreationException e) {
        // Running in Tez mode throw exception
        assertTrue(e.getCause() instanceof FileAlreadyExistsException);
    }
    finally{
        // We have to manually delete intermediate mapreduce files
        Util.deleteFile(cluster, "table_testNativeMRJobSimple_input");
        Util.deleteFile(cluster, "table_testNativeMRJobSimpleDir");
    }
}

Source File: FileOutputFormat.java From hadoop-gpu with Apache License 2.0

5 votes

public void checkOutputSpecs(JobContext job
                             ) throws FileAlreadyExistsException, IOException{
  // Ensure that the output directory is set and not already there
  Path outDir = getOutputPath(job);
  if (outDir == null) {
    throw new InvalidJobConfException("Output directory not set.");
  }
  if (outDir.getFileSystem(job.getConfiguration()).exists(outDir)) {
    throw new FileAlreadyExistsException("Output directory " + outDir + 
                                         " already exists");
  }
}

Source File: CsvBulkLoadToolIT.java From phoenix with Apache License 2.0

5 votes

@Test
public void testAlreadyExistsOutputPath() {
    String tableName = "TABLE9";
    String outputPath = "/tmp/output/tabl9";
    try {
        Statement stmt = conn.createStatement();
        stmt.execute("CREATE TABLE " + tableName + "(ID INTEGER NOT NULL PRIMARY KEY, "
                + "FIRST_NAME VARCHAR, LAST_NAME VARCHAR)");
        
        FileSystem fs = FileSystem.get(getUtility().getConfiguration());
        fs.create(new Path(outputPath));
        FSDataOutputStream outputStream = fs.create(new Path("/tmp/input9.csv"));
        PrintWriter printWriter = new PrintWriter(outputStream);
        printWriter.println("1,FirstName 1,LastName 1");
        printWriter.println("2,FirstName 2,LastName 2");
        printWriter.close();
        
        CsvBulkLoadTool csvBulkLoadTool = new CsvBulkLoadTool();
        csvBulkLoadTool.setConf(getUtility().getConfiguration());
        csvBulkLoadTool.run(new String[] {
            "--input", "/tmp/input9.csv",
            "--output", outputPath,
            "--table", tableName,
            "--zookeeper", zkQuorum });
        
        fail(String.format("Output path %s already exists. hence, should fail",outputPath));
    } catch (Exception ex) {
        assertTrue(ex instanceof FileAlreadyExistsException); 
    }
}

Source File: RegexBulkLoadToolIT.java From phoenix with Apache License 2.0

5 votes

@Test
public void testAlreadyExistsOutputPath() {
    String tableName = "TABLE9";
    String outputPath = "/tmp/output/tabl9";
    try {
        Statement stmt = conn.createStatement();
        stmt.execute("CREATE TABLE " + tableName + "(ID INTEGER NOT NULL PRIMARY KEY, "
                + "FIRST_NAME VARCHAR, LAST_NAME VARCHAR)");
        
        FileSystem fs = FileSystem.get(getUtility().getConfiguration());
        fs.create(new Path(outputPath));
        FSDataOutputStream outputStream = fs.create(new Path("/tmp/input9.csv"));
        PrintWriter printWriter = new PrintWriter(outputStream);
        printWriter.println("1,FirstName 1,LastName 1");
        printWriter.println("2,FirstName 2,LastName 2");
        printWriter.close();
        
        RegexBulkLoadTool regexBulkLoadTool = new RegexBulkLoadTool();
        regexBulkLoadTool.setConf(getUtility().getConfiguration());
        regexBulkLoadTool.run(new String[] {
            "--input", "/tmp/input9.csv",
            "--output", outputPath,
            "--table", tableName,
            "--regex", "([^,]*),([^,]*),([^,]*)",
            "--zookeeper", zkQuorum });
        
        fail(String.format("Output path %s already exists. hence, should fail",outputPath));
    } catch (Exception ex) {
        assertTrue(ex instanceof FileAlreadyExistsException); 
    }
}

Source File: StreamJob.java From hadoop-gpu with Apache License 2.0

4 votes

public int submitAndMonitorJob() throws IOException {

    if (jar_ != null && isLocalHadoop()) {
      // getAbs became required when shell and subvm have different working dirs...
      File wd = new File(".").getAbsoluteFile();
      StreamUtil.unJar(new File(jar_), wd);
    }

    // if jobConf_ changes must recreate a JobClient
    jc_ = new JobClient(jobConf_);
    boolean error = true;
    running_ = null;
    String lastReport = null;
    try {
      running_ = jc_.submitJob(jobConf_);
      jobId_ = running_.getID();

      LOG.info("getLocalDirs(): " + Arrays.asList(jobConf_.getLocalDirs()));
      LOG.info("Running job: " + jobId_);
      jobInfo();

      while (!running_.isComplete()) {
        try {
          Thread.sleep(1000);
        } catch (InterruptedException e) {
        }
        running_ = jc_.getJob(jobId_);
        String report = null;
        report = " map " + Math.round(running_.mapProgress() * 100) + "%  reduce "
          + Math.round(running_.reduceProgress() * 100) + "%";

        if (!report.equals(lastReport)) {
          LOG.info(report);
          lastReport = report;
        }
      }
      if (!running_.isSuccessful()) {
        jobInfo();
	LOG.error("Job not Successful!");
	return 1;
      }
      LOG.info("Job complete: " + jobId_);
      LOG.info("Output: " + output_);
      error = false;
    } catch(FileNotFoundException fe) {
      LOG.error("Error launching job , bad input path : " + fe.getMessage());
      return 2;
    } catch(InvalidJobConfException je) {
      LOG.error("Error launching job , Invalid job conf : " + je.getMessage());
      return 3;
    } catch(FileAlreadyExistsException fae) {
      LOG.error("Error launching job , Output path already exists : " 
                + fae.getMessage());
      return 4;
    } catch(IOException ioe) {
      LOG.error("Error Launching job : " + ioe.getMessage());
      return 5;
    } finally {
      if (error && (running_ != null)) {
        LOG.info("killJob...");
        running_.killJob();
      }
      jc_.close();
    }
    return 0;
  }

Source File: PerMapOutputFormat.java From webarchive-commons with Apache License 2.0

4 votes

/**
 * Over-ride the default FileOutputFormat's checkOutputSpecs() to
 * allow for the target directory to already exist.
 */
public void checkOutputSpecs( FileSystem ignored, JobConf job )
  throws FileAlreadyExistsException, InvalidJobConfException, IOException 
{
}

Source File: SSTableIndexOutputFormat.java From hadoop-sstable with Apache License 2.0

4 votes

@Override
public void checkOutputSpecs(JobContext job) throws FileAlreadyExistsException, IOException {
}

Source File: TestPigContext.java From spork with Apache License 2.0

4 votes

@Test
public void testHadoopExceptionCreation() throws Exception {
    Object object = PigContext
            .instantiateFuncFromSpec("org.apache.hadoop.mapred.FileAlreadyExistsException");
    assertTrue(object instanceof FileAlreadyExistsException);
}

org.apache.hadoop.mapred.FileAlreadyExistsException Java Examples