org.apache.hadoop.mapred.JobClient#submitJob

Source File: TestMiniMRProxyUser.java From big-c with Apache License 2.0

6 votes

private void mrRun() throws Exception {
  FileSystem fs = FileSystem.get(getJobConf());
  Path inputDir = new Path("input");
  fs.mkdirs(inputDir);
  Writer writer = new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt")));
  writer.write("hello");
  writer.close();

  Path outputDir = new Path("output", "output");

  JobConf jobConf = new JobConf(getJobConf());
  jobConf.setInt("mapred.map.tasks", 1);
  jobConf.setInt("mapred.map.max.attempts", 1);
  jobConf.setInt("mapred.reduce.max.attempts", 1);
  jobConf.set("mapred.input.dir", inputDir.toString());
  jobConf.set("mapred.output.dir", outputDir.toString());

  JobClient jobClient = new JobClient(jobConf);
  RunningJob runJob = jobClient.submitJob(jobConf);
  runJob.waitForCompletion();
  assertTrue(runJob.isComplete());
  assertTrue(runJob.isSuccessful());
}

Source File: TestMiniMRProxyUser.java From hadoop with Apache License 2.0

6 votes

private void mrRun() throws Exception {
  FileSystem fs = FileSystem.get(getJobConf());
  Path inputDir = new Path("input");
  fs.mkdirs(inputDir);
  Writer writer = new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt")));
  writer.write("hello");
  writer.close();

  Path outputDir = new Path("output", "output");

  JobConf jobConf = new JobConf(getJobConf());
  jobConf.setInt("mapred.map.tasks", 1);
  jobConf.setInt("mapred.map.max.attempts", 1);
  jobConf.setInt("mapred.reduce.max.attempts", 1);
  jobConf.set("mapred.input.dir", inputDir.toString());
  jobConf.set("mapred.output.dir", outputDir.toString());

  JobClient jobClient = new JobClient(jobConf);
  RunningJob runJob = jobClient.submitJob(jobConf);
  runJob.waitForCompletion();
  assertTrue(runJob.isComplete());
  assertTrue(runJob.isSuccessful());
}

Source File: DataJoinJob.java From hadoop-gpu with Apache License 2.0

6 votes

/**
 * Submit/run a map/reduce job.
 * 
 * @param job
 * @return true for success
 * @throws IOException
 */
public static boolean runJob(JobConf job) throws IOException {
  JobClient jc = new JobClient(job);
  boolean sucess = true;
  RunningJob running = null;
  try {
    running = jc.submitJob(job);
    JobID jobId = running.getID();
    System.out.println("Job " + jobId + " is submitted");
    while (!running.isComplete()) {
      System.out.println("Job " + jobId + " is still running.");
      try {
        Thread.sleep(60000);
      } catch (InterruptedException e) {
      }
      running = jc.getJob(jobId);
    }
    sucess = running.isSuccessful();
  } finally {
    if (!sucess && (running != null)) {
      running.killJob();
    }
    jc.close();
  }
  return sucess;
}

Source File: DataJoinJob.java From hadoop with Apache License 2.0

6 votes

/**
 * Submit/run a map/reduce job.
 * 
 * @param job
 * @return true for success
 * @throws IOException
 */
public static boolean runJob(JobConf job) throws IOException {
  JobClient jc = new JobClient(job);
  boolean sucess = true;
  RunningJob running = null;
  try {
    running = jc.submitJob(job);
    JobID jobId = running.getID();
    System.out.println("Job " + jobId + " is submitted");
    while (!running.isComplete()) {
      System.out.println("Job " + jobId + " is still running.");
      try {
        Thread.sleep(60000);
      } catch (InterruptedException e) {
      }
      running = jc.getJob(jobId);
    }
    sucess = running.isSuccessful();
  } finally {
    if (!sucess && (running != null)) {
      running.killJob();
    }
    jc.close();
  }
  return sucess;
}

Source File: TestEncryptedShuffle.java From hadoop with Apache License 2.0

5 votes

private void encryptedShuffleWithCerts(boolean useClientCerts)
  throws Exception {
  try {
    Configuration conf = new Configuration();
    String keystoresDir = new File(BASEDIR).getAbsolutePath();
    String sslConfsDir =
      KeyStoreTestUtil.getClasspathDir(TestEncryptedShuffle.class);
    KeyStoreTestUtil.setupSSLConfig(keystoresDir, sslConfsDir, conf,
                                    useClientCerts);
    conf.setBoolean(MRConfig.SHUFFLE_SSL_ENABLED_KEY, true);
    startCluster(conf);
    FileSystem fs = FileSystem.get(getJobConf());
    Path inputDir = new Path("input");
    fs.mkdirs(inputDir);
    Writer writer =
      new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt")));
    writer.write("hello");
    writer.close();

    Path outputDir = new Path("output", "output");

    JobConf jobConf = new JobConf(getJobConf());
    jobConf.setInt("mapred.map.tasks", 1);
    jobConf.setInt("mapred.map.max.attempts", 1);
    jobConf.setInt("mapred.reduce.max.attempts", 1);
    jobConf.set("mapred.input.dir", inputDir.toString());
    jobConf.set("mapred.output.dir", outputDir.toString());
    JobClient jobClient = new JobClient(jobConf);
    RunningJob runJob = jobClient.submitJob(jobConf);
    runJob.waitForCompletion();
    Assert.assertTrue(runJob.isComplete());
    Assert.assertTrue(runJob.isSuccessful());
  } finally {
    stopCluster();
  }
}

Source File: HadoopAlgoRunner.java From mr4c with Apache License 2.0

5 votes

private void submitJob() throws IOException {
	// most of this method copies JobClient.runJob()
	// addition here is logging the job URI
	JobClient client = new JobClient(m_jobConf);
	RunningJob job = client.submitJob(m_jobConf);
	m_log.info("Job URL is [{}]" , job.getTrackingURL());
	try {
		if ( !client.monitorAndPrintJob(m_jobConf, job) ) {
			throw new IOException("Job failed!");
		}
	} catch (InterruptedException ie ) {
		Thread.currentThread().interrupt();
	}
}

Source File: TestEncryptedShuffle.java From big-c with Apache License 2.0

5 votes

private void encryptedShuffleWithCerts(boolean useClientCerts)
  throws Exception {
  try {
    Configuration conf = new Configuration();
    String keystoresDir = new File(BASEDIR).getAbsolutePath();
    String sslConfsDir =
      KeyStoreTestUtil.getClasspathDir(TestEncryptedShuffle.class);
    KeyStoreTestUtil.setupSSLConfig(keystoresDir, sslConfsDir, conf,
                                    useClientCerts);
    conf.setBoolean(MRConfig.SHUFFLE_SSL_ENABLED_KEY, true);
    startCluster(conf);
    FileSystem fs = FileSystem.get(getJobConf());
    Path inputDir = new Path("input");
    fs.mkdirs(inputDir);
    Writer writer =
      new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt")));
    writer.write("hello");
    writer.close();

    Path outputDir = new Path("output", "output");

    JobConf jobConf = new JobConf(getJobConf());
    jobConf.setInt("mapred.map.tasks", 1);
    jobConf.setInt("mapred.map.max.attempts", 1);
    jobConf.setInt("mapred.reduce.max.attempts", 1);
    jobConf.set("mapred.input.dir", inputDir.toString());
    jobConf.set("mapred.output.dir", outputDir.toString());
    JobClient jobClient = new JobClient(jobConf);
    RunningJob runJob = jobClient.submitJob(jobConf);
    runJob.waitForCompletion();
    Assert.assertTrue(runJob.isComplete());
    Assert.assertTrue(runJob.isSuccessful());
  } finally {
    stopCluster();
  }
}

Source File: TestMROldApiJobs.java From hadoop with Apache License 2.0

5 votes

static boolean runJob(JobConf conf, Path inDir, Path outDir, int numMaps, 
                         int numReds) throws IOException, InterruptedException {

  FileSystem fs = FileSystem.get(conf);
  if (fs.exists(outDir)) {
    fs.delete(outDir, true);
  }
  if (!fs.exists(inDir)) {
    fs.mkdirs(inDir);
  }
  String input = "The quick brown fox\n" + "has many silly\n"
      + "red fox sox\n";
  for (int i = 0; i < numMaps; ++i) {
    DataOutputStream file = fs.create(new Path(inDir, "part-" + i));
    file.writeBytes(input);
    file.close();
  }

  DistributedCache.addFileToClassPath(TestMRJobs.APP_JAR, conf, fs);
  conf.setOutputCommitter(CustomOutputCommitter.class);
  conf.setInputFormat(TextInputFormat.class);
  conf.setOutputKeyClass(LongWritable.class);
  conf.setOutputValueClass(Text.class);

  FileInputFormat.setInputPaths(conf, inDir);
  FileOutputFormat.setOutputPath(conf, outDir);
  conf.setNumMapTasks(numMaps);
  conf.setNumReduceTasks(numReds);

  JobClient jobClient = new JobClient(conf);
  
  RunningJob job = jobClient.submitJob(conf);
  return jobClient.monitorAndPrintJob(conf, job);
}

Source File: StreamJob.java From hadoop-gpu with Apache License 2.0

4 votes

public int submitAndMonitorJob() throws IOException {

    if (jar_ != null && isLocalHadoop()) {
      // getAbs became required when shell and subvm have different working dirs...
      File wd = new File(".").getAbsoluteFile();
      StreamUtil.unJar(new File(jar_), wd);
    }

    // if jobConf_ changes must recreate a JobClient
    jc_ = new JobClient(jobConf_);
    boolean error = true;
    running_ = null;
    String lastReport = null;
    try {
      running_ = jc_.submitJob(jobConf_);
      jobId_ = running_.getID();

      LOG.info("getLocalDirs(): " + Arrays.asList(jobConf_.getLocalDirs()));
      LOG.info("Running job: " + jobId_);
      jobInfo();

      while (!running_.isComplete()) {
        try {
          Thread.sleep(1000);
        } catch (InterruptedException e) {
        }
        running_ = jc_.getJob(jobId_);
        String report = null;
        report = " map " + Math.round(running_.mapProgress() * 100) + "%  reduce "
          + Math.round(running_.reduceProgress() * 100) + "%";

        if (!report.equals(lastReport)) {
          LOG.info(report);
          lastReport = report;
        }
      }
      if (!running_.isSuccessful()) {
        jobInfo();
	LOG.error("Job not Successful!");
	return 1;
      }
      LOG.info("Job complete: " + jobId_);
      LOG.info("Output: " + output_);
      error = false;
    } catch(FileNotFoundException fe) {
      LOG.error("Error launching job , bad input path : " + fe.getMessage());
      return 2;
    } catch(InvalidJobConfException je) {
      LOG.error("Error launching job , Invalid job conf : " + je.getMessage());
      return 3;
    } catch(FileAlreadyExistsException fae) {
      LOG.error("Error launching job , Output path already exists : " 
                + fae.getMessage());
      return 4;
    } catch(IOException ioe) {
      LOG.error("Error Launching job : " + ioe.getMessage());
      return 5;
    } finally {
      if (error && (running_ != null)) {
        LOG.info("killJob...");
        running_.killJob();
      }
      jc_.close();
    }
    return 0;
  }

Source File: StreamJob.java From hadoop with Apache License 2.0

4 votes

public int submitAndMonitorJob() throws IOException {

    if (jar_ != null && isLocalHadoop()) {
      // getAbs became required when shell and subvm have different working dirs...
      File wd = new File(".").getAbsoluteFile();
      RunJar.unJar(new File(jar_), wd);
    }

    // if jobConf_ changes must recreate a JobClient
    jc_ = new JobClient(jobConf_);
    running_ = null;
    try {
      running_ = jc_.submitJob(jobConf_);
      jobId_ = running_.getID();
      if (background_) {
        LOG.info("Job is running in background.");
      } else if (!jc_.monitorAndPrintJob(jobConf_, running_)) {
        LOG.error("Job not successful!");
        return 1;
      }
      LOG.info("Output directory: " + output_);
    } catch(FileNotFoundException fe) {
      LOG.error("Error launching job , bad input path : " + fe.getMessage());
      return 2;
    } catch(InvalidJobConfException je) {
      LOG.error("Error launching job , Invalid job conf : " + je.getMessage());
      return 3;
    } catch(FileAlreadyExistsException fae) {
      LOG.error("Error launching job , Output path already exists : "
                + fae.getMessage());
      return 4;
    } catch(IOException ioe) {
      LOG.error("Error Launching job : " + ioe.getMessage());
      return 5;
    } catch (InterruptedException ie) {
      LOG.error("Error monitoring job : " + ie.getMessage());
      return 6;
    } finally {
      jc_.close();
    }
    return 0;
  }

Source File: TestMRAppWithCombiner.java From hadoop with Apache License 2.0

4 votes

static boolean runJob(JobConf conf) throws Exception {
  JobClient jobClient = new JobClient(conf);
  RunningJob job = jobClient.submitJob(conf);
  return jobClient.monitorAndPrintJob(conf, job);
}

Source File: TestMRAppWithCombiner.java From big-c with Apache License 2.0

4 votes

static boolean runJob(JobConf conf) throws Exception {
  JobClient jobClient = new JobClient(conf);
  RunningJob job = jobClient.submitJob(conf);
  return jobClient.monitorAndPrintJob(conf, job);
}

Source File: TestKeyFieldBasedComparator.java From hadoop-gpu with Apache License 2.0

4 votes

public void configure(String keySpec, int expect) throws Exception {
  Path testdir = new Path("build/test/test.mapred.spill");
  Path inDir = new Path(testdir, "in");
  Path outDir = new Path(testdir, "out");
  FileSystem fs = getFileSystem();
  fs.delete(testdir, true);
  conf.setInputFormat(TextInputFormat.class);
  FileInputFormat.setInputPaths(conf, inDir);
  FileOutputFormat.setOutputPath(conf, outDir);
  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(LongWritable.class);

  conf.setNumMapTasks(1);
  conf.setNumReduceTasks(2);

  conf.setOutputFormat(TextOutputFormat.class);
  conf.setOutputKeyComparatorClass(KeyFieldBasedComparator.class);
  conf.setKeyFieldComparatorOptions(keySpec);
  conf.setKeyFieldPartitionerOptions("-k1.1,1.1");
  conf.set("map.output.key.field.separator", " ");
  conf.setMapperClass(InverseMapper.class);
  conf.setReducerClass(IdentityReducer.class);
  if (!fs.mkdirs(testdir)) {
    throw new IOException("Mkdirs failed to create " + testdir.toString());
  }
  if (!fs.mkdirs(inDir)) {
    throw new IOException("Mkdirs failed to create " + inDir.toString());
  }
  // set up input data in 2 files 
  Path inFile = new Path(inDir, "part0");
  FileOutputStream fos = new FileOutputStream(inFile.toString());
  fos.write((line1 + "\n").getBytes());
  fos.write((line2 + "\n").getBytes());
  fos.close();
  JobClient jc = new JobClient(conf);
  RunningJob r_job = jc.submitJob(conf);
  while (!r_job.isComplete()) {
    Thread.sleep(1000);
  }
  
  if (!r_job.isSuccessful()) {
    fail("Oops! The job broke due to an unexpected error");
  }
  Path[] outputFiles = FileUtil.stat2Paths(
      getFileSystem().listStatus(outDir,
      new OutputLogFilter()));
  if (outputFiles.length > 0) {
    InputStream is = getFileSystem().open(outputFiles[0]);
    BufferedReader reader = new BufferedReader(new InputStreamReader(is));
    String line = reader.readLine();
    //make sure we get what we expect as the first line, and also
    //that we have two lines (both the lines must end up in the same
    //reducer since the partitioner takes the same key spec for all
    //lines
    if (expect == 1) {
      assertTrue(line.startsWith(line1));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line2));
    }
    line = reader.readLine();
    if (expect == 1) {
      assertTrue(line.startsWith(line2));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line1));
    }
    reader.close();
  }
}

Source File: StreamJob.java From big-c with Apache License 2.0

4 votes

public int submitAndMonitorJob() throws IOException {

    if (jar_ != null && isLocalHadoop()) {
      // getAbs became required when shell and subvm have different working dirs...
      File wd = new File(".").getAbsoluteFile();
      RunJar.unJar(new File(jar_), wd);
    }

    // if jobConf_ changes must recreate a JobClient
    jc_ = new JobClient(jobConf_);
    running_ = null;
    try {
      running_ = jc_.submitJob(jobConf_);
      jobId_ = running_.getID();
      if (background_) {
        LOG.info("Job is running in background.");
      } else if (!jc_.monitorAndPrintJob(jobConf_, running_)) {
        LOG.error("Job not successful!");
        return 1;
      }
      LOG.info("Output directory: " + output_);
    } catch(FileNotFoundException fe) {
      LOG.error("Error launching job , bad input path : " + fe.getMessage());
      return 2;
    } catch(InvalidJobConfException je) {
      LOG.error("Error launching job , Invalid job conf : " + je.getMessage());
      return 3;
    } catch(FileAlreadyExistsException fae) {
      LOG.error("Error launching job , Output path already exists : "
                + fae.getMessage());
      return 4;
    } catch(IOException ioe) {
      LOG.error("Error Launching job : " + ioe.getMessage());
      return 5;
    } catch (InterruptedException ie) {
      LOG.error("Error monitoring job : " + ie.getMessage());
      return 6;
    } finally {
      jc_.close();
    }
    return 0;
  }

Source File: JobRunner.java From tracing-framework with BSD 3-Clause "New" or "Revised" License

4 votes

private void runJobContinuously() throws IOException, InterruptedException {
    System.out.printf("Starting MapReduce %s runner for tenant %d\n", job.getClass().getSimpleName(), tenantClass);
    this.status = "Configuring";

    // Set the HDFS variables in the config
    Configuration conf = new Configuration();
    conf.set("yarn.resourcemanager.hostname", ConfigFactory.load().getString("mapreduce-generator.yarn-resourcemanager-hostname"));
    conf.set("mapreduce.framework.name", "yarn");
    conf.set("fs.defaultFS", ConfigFactory.load().getString("mapreduce-generator.hdfs-namenode-url"));

    // Create a job config and get the job to populate it
    JobConf jobconf = new JobConf(conf);
    job.configure(jobconf);

    // Populate the input data if needed
    this.status = "Initializing input data";
    FileSystem fs = FileSystem.get(conf);
    job.initialize(fs);

    // Now start running the job in a loop
    while (!Thread.currentThread().isInterrupted()) {
        this.status = "Cleaning up Job #" + count;
        // Clear any previous xtrace context
        Baggage.stop();

        // Clean up previous output if necessary
        job.teardown(fs);

        // Set the xtrace metadata for the new job
        boolean sampled = numToSample.getAndDecrement() > 0;
        if (sampled) {
            XTrace.startTask(true);
            xtrace.log("Starting job");
        } else {
            numToSample.getAndIncrement();
        }
        Retro.setTenant(tenantClass);

        // Run the job
        try {
            this.status = "Running Job #" + (count++);
            JobClient jc = new JobClient(jobconf);
            rj = jc.submitJob(jobconf);
            rj.waitForCompletion();
            System.out.println("Job Complete");
            System.out.println(rj);
            rj = null;
        } finally {
            // Log the end of the job and clear the metadata
            if (sampled)
                xtrace.log("Job complete");
            Baggage.stop();
        }
    }
    throw new InterruptedException("JobRunner interrupted");
}

Source File: TestKeyFieldBasedComparator.java From hadoop with Apache License 2.0

4 votes

public void configure(String keySpec, int expect) throws Exception {
  Path testdir = new Path(TEST_DIR.getAbsolutePath());
  Path inDir = new Path(testdir, "in");
  Path outDir = new Path(testdir, "out");
  FileSystem fs = getFileSystem();
  fs.delete(testdir, true);
  conf.setInputFormat(TextInputFormat.class);
  FileInputFormat.setInputPaths(conf, inDir);
  FileOutputFormat.setOutputPath(conf, outDir);
  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(LongWritable.class);

  conf.setNumMapTasks(1);
  conf.setNumReduceTasks(1);

  conf.setOutputFormat(TextOutputFormat.class);
  conf.setOutputKeyComparatorClass(KeyFieldBasedComparator.class);
  conf.setKeyFieldComparatorOptions(keySpec);
  conf.setKeyFieldPartitionerOptions("-k1.1,1.1");
  conf.set(JobContext.MAP_OUTPUT_KEY_FIELD_SEPERATOR, " ");
  conf.setMapperClass(InverseMapper.class);
  conf.setReducerClass(IdentityReducer.class);
  if (!fs.mkdirs(testdir)) {
    throw new IOException("Mkdirs failed to create " + testdir.toString());
  }
  if (!fs.mkdirs(inDir)) {
    throw new IOException("Mkdirs failed to create " + inDir.toString());
  }
  // set up input data in 2 files 
  Path inFile = new Path(inDir, "part0");
  FileOutputStream fos = new FileOutputStream(inFile.toString());
  fos.write((line1 + "\n").getBytes());
  fos.write((line2 + "\n").getBytes());
  fos.close();
  JobClient jc = new JobClient(conf);
  RunningJob r_job = jc.submitJob(conf);
  while (!r_job.isComplete()) {
    Thread.sleep(1000);
  }
  
  if (!r_job.isSuccessful()) {
    fail("Oops! The job broke due to an unexpected error");
  }
  Path[] outputFiles = FileUtil.stat2Paths(
      getFileSystem().listStatus(outDir,
      new Utils.OutputFileUtils.OutputFilesFilter()));
  if (outputFiles.length > 0) {
    InputStream is = getFileSystem().open(outputFiles[0]);
    BufferedReader reader = new BufferedReader(new InputStreamReader(is));
    String line = reader.readLine();
    //make sure we get what we expect as the first line, and also
    //that we have two lines
    if (expect == 1) {
      assertTrue(line.startsWith(line1));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line2));
    }
    line = reader.readLine();
    if (expect == 1) {
      assertTrue(line.startsWith(line2));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line1));
    }
    reader.close();
  }
}

Source File: StreamJob.java From RDFS with Apache License 2.0

4 votes

public int submitAndMonitorJob() throws IOException {

    if (jar_ != null && isLocalHadoop()) {
      // getAbs became required when shell and subvm have different working dirs...
      File wd = new File(".").getAbsoluteFile();
      StreamUtil.unJar(new File(jar_), wd);
    }

    // if jobConf_ changes must recreate a JobClient
    jc_ = new JobClient(jobConf_);
    boolean error = true;
    running_ = null;
    String lastReport = null;
    try {
      running_ = jc_.submitJob(jobConf_);
      jobId_ = running_.getID();

      LOG.info("getLocalDirs(): " + Arrays.asList(jobConf_.getLocalDirs()));
      LOG.info("Running job: " + jobId_);
      jobInfo();

      while (!running_.isComplete()) {
        try {
          Thread.sleep(1000);
        } catch (InterruptedException e) {
        }
        running_ = jc_.getJob(jobId_);
        String report = null;
        report = " map " + Math.round(running_.mapProgress() * 100) + "%  reduce "
          + Math.round(running_.reduceProgress() * 100) + "%";

        if (!report.equals(lastReport)) {
          LOG.info(report);
          lastReport = report;
        }
      }
      if (!running_.isSuccessful()) {
        jobInfo();
	LOG.error("Job not Successful!");
	return 1;
      }
      LOG.info("Job complete: " + jobId_);
      LOG.info("Output: " + output_);
      error = false;
    } catch(FileNotFoundException fe) {
      LOG.error("Error launching job , bad input path : " + fe.getMessage());
      return 2;
    } catch(InvalidJobConfException je) {
      LOG.error("Error launching job , Invalid job conf : " + je.getMessage());
      return 3;
    } catch(FileAlreadyExistsException fae) {
      LOG.error("Error launching job , Output path already exists : " 
                + fae.getMessage());
      return 4;
    } catch(IOException ioe) {
      LOG.error("Error Launching job : " + ioe.getMessage());
      return 5;
    } finally {
      if (error && (running_ != null)) {
        LOG.info("killJob...");
        running_.killJob();
      }
      jc_.close();
    }
    return 0;
  }

Source File: TestKeyFieldBasedComparator.java From RDFS with Apache License 2.0

4 votes

public void configure(String keySpec, int expect) throws Exception {
  Path testdir = new Path("build/test/test.mapred.spill");
  Path inDir = new Path(testdir, "in");
  Path outDir = new Path(testdir, "out");
  FileSystem fs = getFileSystem();
  fs.delete(testdir, true);
  conf.setInputFormat(TextInputFormat.class);
  FileInputFormat.setInputPaths(conf, inDir);
  FileOutputFormat.setOutputPath(conf, outDir);
  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(LongWritable.class);

  conf.setNumMapTasks(1);
  conf.setNumReduceTasks(2);

  conf.setOutputFormat(TextOutputFormat.class);
  conf.setOutputKeyComparatorClass(KeyFieldBasedComparator.class);
  conf.setKeyFieldComparatorOptions(keySpec);
  conf.setKeyFieldPartitionerOptions("-k1.1,1.1");
  conf.set("map.output.key.field.separator", " ");
  conf.setMapperClass(InverseMapper.class);
  conf.setReducerClass(IdentityReducer.class);
  if (!fs.mkdirs(testdir)) {
    throw new IOException("Mkdirs failed to create " + testdir.toString());
  }
  if (!fs.mkdirs(inDir)) {
    throw new IOException("Mkdirs failed to create " + inDir.toString());
  }
  // set up input data in 2 files 
  Path inFile = new Path(inDir, "part0");
  FileOutputStream fos = new FileOutputStream(inFile.toString());
  fos.write((line1 + "\n").getBytes());
  fos.write((line2 + "\n").getBytes());
  fos.close();
  JobClient jc = new JobClient(conf);
  RunningJob r_job = jc.submitJob(conf);
  while (!r_job.isComplete()) {
    Thread.sleep(1000);
  }
  
  if (!r_job.isSuccessful()) {
    fail("Oops! The job broke due to an unexpected error");
  }
  Path[] outputFiles = FileUtil.stat2Paths(
      getFileSystem().listStatus(outDir,
      new Utils.OutputFileUtils.OutputFilesFilter()));
  if (outputFiles.length > 0) {
    InputStream is = getFileSystem().open(outputFiles[0]);
    BufferedReader reader = new BufferedReader(new InputStreamReader(is));
    String line = reader.readLine();
    //make sure we get what we expect as the first line, and also
    //that we have two lines (both the lines must end up in the same
    //reducer since the partitioner takes the same key spec for all
    //lines
    if (expect == 1) {
      assertTrue(line.startsWith(line1));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line2));
    }
    line = reader.readLine();
    if (expect == 1) {
      assertTrue(line.startsWith(line2));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line1));
    }
    reader.close();
  }
}

Source File: StreamJob.java From RDFS with Apache License 2.0

4 votes

public int submitAndMonitorJob() throws IOException {

    if (jar_ != null && isLocalHadoop()) {
      // getAbs became required when shell and subvm have different working dirs...
      File wd = new File(".").getAbsoluteFile();
      StreamUtil.unJar(new File(jar_), wd);
    }

    // if jobConf_ changes must recreate a JobClient
    jc_ = new JobClient(jobConf_);
    boolean error = true;
    running_ = null;
    String lastReport = null;
    try {
      running_ = jc_.submitJob(jobConf_);
      jobId_ = running_.getJobID();

      LOG.info("getLocalDirs(): " + Arrays.asList(jobConf_.getLocalDirs()));
      LOG.info("Running job: " + jobId_);
      jobInfo();

      while (!running_.isComplete()) {
        try {
          Thread.sleep(1000);
        } catch (InterruptedException e) {
        }
        running_ = jc_.getJob(jobId_);
        String report = null;
        report = " map " + Math.round(running_.mapProgress() * 100) + "%  reduce "
          + Math.round(running_.reduceProgress() * 100) + "%";

        if (!report.equals(lastReport)) {
          LOG.info(report);
          lastReport = report;
        }
      }
      if (!running_.isSuccessful()) {
        jobInfo();
	LOG.error("Job not Successful!");
	return 1;
      }
      LOG.info("Job complete: " + jobId_);
      LOG.info("Output: " + output_);
      error = false;
    } catch(FileNotFoundException fe) {
      LOG.error("Error launching job , bad input path : " + fe.getMessage());
      return 2;
    } catch(InvalidJobConfException je) {
      LOG.error("Error launching job , Invalid job conf : " + je.getMessage());
      return 3;
    } catch(FileAlreadyExistsException fae) {
      LOG.error("Error launching job , Output path already exists : "
                + fae.getMessage());
      return 4;
    } catch(IOException ioe) {
      LOG.error("Error Launching job : " + ioe.getMessage());
      return 5;
    } finally {
      if (error && (running_ != null)) {
        LOG.info("killJob...");
        running_.killJob();
      }
      jc_.close();
    }
    return 0;
  }

Source File: DataFsck.java From RDFS with Apache License 2.0

4 votes

List<JobContext> submitJobs(BufferedReader inputReader, int filesPerJob) throws IOException {
  boolean done = false;
  JobClient jClient = new JobClient(createJobConf());
  List<JobContext> submitted = new ArrayList<JobContext>();
  Random rand = new Random();
  do {
    JobConf jobConf = createJobConf();
    final String randomId = Integer.toString(rand.nextInt(Integer.MAX_VALUE), 36);
    Path jobDir = new Path(jClient.getSystemDir(), NAME + "_" + randomId);
    jobConf.set(JOB_DIR_LABEL, jobDir.toString());
    Path log = new Path(jobDir, "_logs");
    FileOutputFormat.setOutputPath(jobConf, log);
    LOG.info("log=" + log);

    // create operation list
    FileSystem fs = jobDir.getFileSystem(jobConf);
    Path opList = new Path(jobDir, "_" + OP_LIST_LABEL);
    jobConf.set(OP_LIST_LABEL, opList.toString());
    int opCount = 0, synCount = 0;
    SequenceFile.Writer opWriter = null;

    try {
      opWriter = SequenceFile.createWriter(fs, jobConf, opList, Text.class,
          Text.class, SequenceFile.CompressionType.NONE);
      String f = null;
      do {
        f = inputReader.readLine();
        if (f == null) {
          done = true;
          break;
        }
        opWriter.append(new Text(f), new Text(f));
        opCount++;
        if (++synCount > SYNC_FILE_MAX) {
          opWriter.sync();
          synCount = 0;
        }
      } while (opCount < filesPerJob);
    } finally {
      if (opWriter != null) {
        opWriter.close();
      }
      fs.setReplication(opList, OP_LIST_REPLICATION); // increase replication for control file
    }

    jobConf.setInt(OP_COUNT_LABEL, opCount);
    RunningJob rJob = jClient.submitJob(jobConf);
    JobContext ctx = new JobContext(rJob, jobConf);
    submitted.add(ctx);
  } while (!done);

  return submitted;
}

Java Code Examples for org.apache.hadoop.mapred.JobClient#submitJob()