org.apache.hadoop.mapred.JobConf#setInt

Source File: TestDatamerge.java From hadoop with Apache License 2.0

6 votes

private static void joinAs(String jointype,
    Class<? extends SimpleCheckerBase> c) throws Exception {
  final int srcs = 4;
  Configuration conf = new Configuration();
  JobConf job = new JobConf(conf, c);
  Path base = cluster.getFileSystem().makeQualified(new Path("/"+jointype));
  Path[] src = writeSimpleSrc(base, conf, srcs);
  job.set("mapreduce.join.expr", CompositeInputFormat.compose(jointype,
      SequenceFileInputFormat.class, src));
  job.setInt("testdatamerge.sources", srcs);
  job.setInputFormat(CompositeInputFormat.class);
  FileOutputFormat.setOutputPath(job, new Path(base, "out"));

  job.setMapperClass(c);
  job.setReducerClass(c);
  job.setOutputKeyClass(IntWritable.class);
  job.setOutputValueClass(IntWritable.class);
  JobClient.runJob(job);
  base.getFileSystem(job).delete(base, true);
}

Source File: TestMiniCoronaRunJob.java From RDFS with Apache License 2.0

6 votes

public void testMemoryLimit() throws Exception {
  LOG.info("Starting testMemoryLimit");
  JobConf conf = new JobConf();
  conf.setInt(CoronaConf.NODE_RESERVED_MEMORY_MB, Integer.MAX_VALUE);
  corona = new MiniCoronaCluster.Builder().conf(conf).numTaskTrackers(2).build();
  final JobConf jobConf = corona.createJobConf();
  long start = System.currentTimeMillis();
  FutureTask<Boolean> task = submitSleepJobFutureTask(jobConf);
  checkTaskNotDone(task, 10);
  NodeManager nm =  corona.getClusterManager().getNodeManager();
  nm.getResourceLimit().setNodeReservedMemoryMB(0);
  Assert.assertTrue(task.get());
  long end = System.currentTimeMillis();
  LOG.info("Task Done. Verifying");
  new ClusterManagerMetricsVerifier(corona.getClusterManager(),
      1, 1, 1, 1, 1, 1, 0, 0).verifyAll();
  LOG.info("Time spent for testMemoryLimit:" +
      (end - start));
}

Source File: MRTask.java From incubator-tez with Apache License 2.0

6 votes

public void localizeConfiguration(JobConf jobConf)
    throws IOException, InterruptedException {
  jobConf.set(JobContext.TASK_ID, taskAttemptId.getTaskID().toString());
  jobConf.set(JobContext.TASK_ATTEMPT_ID, taskAttemptId.toString());
  jobConf.setInt(JobContext.TASK_PARTITION,
      taskAttemptId.getTaskID().getId());
  jobConf.set(JobContext.ID, taskAttemptId.getJobID().toString());
  
  jobConf.setBoolean(MRJobConfig.TASK_ISMAP, isMap);
  
  Path outputPath = FileOutputFormat.getOutputPath(jobConf);
  if (outputPath != null) {
    if ((committer instanceof FileOutputCommitter)) {
      FileOutputFormat.setWorkOutputPath(jobConf, 
        ((FileOutputCommitter)committer).getTaskAttemptPath(taskAttemptContext));
    } else {
      FileOutputFormat.setWorkOutputPath(jobConf, outputPath);
    }
  }
}

Source File: TeraSort.java From hadoop-book with Apache License 2.0

6 votes

public int run(String[] args) throws Exception {
  LOG.info("starting");
  JobConf job = (JobConf) getConf();
  Path inputDir = new Path(args[0]);
  inputDir = inputDir.makeQualified(inputDir.getFileSystem(job));
  Path partitionFile = new Path(inputDir, TeraInputFormat.PARTITION_FILENAME);
  URI partitionUri = new URI(partitionFile.toString() +
                             "#" + TeraInputFormat.PARTITION_FILENAME);
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraSort");
  job.setJarByClass(TeraSort.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setInputFormat(TeraInputFormat.class);
  job.setOutputFormat(TeraOutputFormat.class);
  job.setPartitionerClass(TotalOrderPartitioner.class);
  TeraInputFormat.writePartitionFile(job, partitionFile);
  DistributedCache.addCacheFile(partitionUri, job);
  DistributedCache.createSymlink(job);
  job.setInt("dfs.replication", 1);
  TeraOutputFormat.setFinalSync(job, true);
  JobClient.runJob(job);
  LOG.info("done");
  return 0;
}

Source File: MRTask.java From tez with Apache License 2.0

6 votes

public void localizeConfiguration(JobConf jobConf)
    throws IOException, InterruptedException {
  jobConf.set(JobContext.TASK_ID, taskAttemptId.getTaskID().toString());
  jobConf.set(JobContext.TASK_ATTEMPT_ID, taskAttemptId.toString());
  jobConf.setInt(JobContext.TASK_PARTITION,
      taskAttemptId.getTaskID().getId());
  jobConf.set(JobContext.ID, taskAttemptId.getJobID().toString());
  
  jobConf.setBoolean(MRJobConfig.TASK_ISMAP, isMap);
  
  Path outputPath = FileOutputFormat.getOutputPath(jobConf);
  if (outputPath != null) {
    if ((committer instanceof FileOutputCommitter)) {
      FileOutputFormat.setWorkOutputPath(jobConf, 
        ((FileOutputCommitter)committer).getTaskAttemptPath(taskAttemptContext));
    } else {
      FileOutputFormat.setWorkOutputPath(jobConf, outputPath);
    }
  }
}

Source File: TestDeprecatedKeys.java From incubator-tez with Apache License 2.0

5 votes

@Test
public void verifyReduceKeyTranslation() {
  JobConf jobConf = new JobConf();

  jobConf.setFloat(MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT, 0.4f);
  jobConf.setLong(MRJobConfig.REDUCE_MEMORY_TOTAL_BYTES, 20000l);
  jobConf.setInt(MRJobConfig.IO_SORT_FACTOR, 2000);
  jobConf.setFloat(MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT, 0.55f);
  jobConf.setFloat(MRJobConfig.REDUCE_MEMTOMEM_THRESHOLD, 0.60f);
  jobConf.setFloat(MRJobConfig.SHUFFLE_MERGE_PERCENT, 0.22f);
  jobConf.setBoolean(MRJobConfig.REDUCE_MEMTOMEM_ENABLED, true);
  jobConf.setFloat(MRJobConfig.REDUCE_INPUT_BUFFER_PERCENT, 0.33f);

  MRHelpers.translateVertexConfToTez(jobConf);

  assertEquals(0.4f, jobConf.getFloat(
      TezJobConfig.TEZ_RUNTIME_SHUFFLE_INPUT_BUFFER_PERCENT, 0f), 0.01f);
  assertEquals(20000l, jobConf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY, 0));
  assertEquals(2000,
      jobConf.getInt(TezJobConfig.TEZ_RUNTIME_IO_SORT_FACTOR, 0));
  assertEquals(0.55f, jobConf.getFloat(
      TezJobConfig.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, 0), 0.01f);
  assertEquals(0.60f,
      jobConf.getFloat(TezJobConfig.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 0),
      0.01f);
  assertEquals(0.22f,
      jobConf.getFloat(TezJobConfig.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT, 0),
      0.01f);
  assertEquals(true, jobConf.getBoolean(
      TezJobConfig.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, false));
  assertEquals(0.33f,
      jobConf.getFloat(TezJobConfig.TEZ_RUNTIME_INPUT_BUFFER_PERCENT, 0),
      0.01f);
}

Source File: AbstractMROldApiSaveTest.java From elasticsearch-hadoop with Apache License 2.0

5 votes

@Parameters
public static Collection<Object[]> configs() throws Exception {
    JobConf conf = HdpBootstrap.hadoopConfig();

    conf.setInputFormat(SplittableTextInputFormat.class);
    conf.setOutputFormat(EsOutputFormat.class);
    conf.setReducerClass(IdentityReducer.class);
    HadoopCfgUtils.setGenericOptions(conf);
    conf.setNumMapTasks(2);
    conf.setInt("actual.splits", 2);
    conf.setNumReduceTasks(0);


    JobConf standard = new JobConf(conf);
    standard.setMapperClass(TabMapper.class);
    standard.setMapOutputValueClass(LinkedMapWritable.class);
    standard.set(ConfigurationOptions.ES_INPUT_JSON, "false");
    FileInputFormat.setInputPaths(standard, new Path(MRSuite.testData.sampleArtistsDat(conf)));

    JobConf json = new JobConf(conf);
    json.setMapperClass(IdentityMapper.class);
    json.setMapOutputValueClass(Text.class);
    json.set(ConfigurationOptions.ES_INPUT_JSON, "true");
    FileInputFormat.setInputPaths(json, new Path(MRSuite.testData.sampleArtistsJson(conf)));

    return Arrays.asList(new Object[][] {
            { standard, "" },
            { json, "json-" }
    });
}

Source File: ValueAggregatorJob.java From hadoop with Apache License 2.0

5 votes

public static void setAggregatorDescriptors(JobConf job
    , Class<? extends ValueAggregatorDescriptor>[] descriptors) {
  job.setInt("aggregator.descriptor.num", descriptors.length);
  //specify the aggregator descriptors
  for(int i=0; i< descriptors.length; i++) {
    job.set("aggregator.descriptor." + i, "UserDefined," + descriptors[i].getName());
  }    
}

Source File: TeraSort.java From RDFS with Apache License 2.0

5 votes

public int run(String[] args) throws Exception {
  LOG.info("starting");
  JobConf job = (JobConf) getConf();
  Path inputDir = new Path(args[0]);
  inputDir = inputDir.makeQualified(inputDir.getFileSystem(job));
  Path partitionFile = new Path(inputDir, TeraInputFormat.PARTITION_FILENAME);
  URI partitionUri = new URI(partitionFile.toString() +
                             "#" + TeraInputFormat.PARTITION_FILENAME);
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraSort");
  job.setJarByClass(TeraSort.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setInputFormat(TeraInputFormat.class);
  job.setOutputFormat(TeraOutputFormat.class);
  job.setPartitionerClass(TotalOrderPartitioner.class);
  TeraInputFormat.writePartitionFile(job, partitionFile);
  DistributedCache.addCacheFile(partitionUri, job);
  DistributedCache.createSymlink(job);
  job.setInt("dfs.replication", 1);
  TeraOutputFormat.setFinalSync(job, true);
  long startTime = System.currentTimeMillis();
  JobClient.runJob(job);
  long endTime = System.currentTimeMillis();
  System.out.println((float)(endTime-startTime)/1000);
  LOG.info("done");
  return 0;
}

Source File: TestMRHelpers.java From tez with Apache License 2.0

5 votes

@Test(timeout = 5000)
public void testContainerResourceConstruction() {
  JobConf conf = new JobConf(new Configuration());
  Resource mapResource = MRHelpers.getResourceForMRMapper(conf);
  Resource reduceResource = MRHelpers.getResourceForMRReducer(conf);

  Assert.assertEquals(MRJobConfig.DEFAULT_MAP_CPU_VCORES,
      mapResource.getVirtualCores());
  Assert.assertEquals(MRJobConfig.DEFAULT_MAP_MEMORY_MB,
      mapResource.getMemory());
  Assert.assertEquals(MRJobConfig.DEFAULT_REDUCE_CPU_VCORES,
      reduceResource.getVirtualCores());
  Assert.assertEquals(MRJobConfig.DEFAULT_REDUCE_MEMORY_MB,
      reduceResource.getMemory());

  conf.setInt(MRJobConfig.MAP_CPU_VCORES, 2);
  conf.setInt(MRJobConfig.MAP_MEMORY_MB, 123);
  conf.setInt(MRJobConfig.REDUCE_CPU_VCORES, 20);
  conf.setInt(MRJobConfig.REDUCE_MEMORY_MB, 1234);

  mapResource = MRHelpers.getResourceForMRMapper(conf);
  reduceResource = MRHelpers.getResourceForMRReducer(conf);

  Assert.assertEquals(2, mapResource.getVirtualCores());
  Assert.assertEquals(123, mapResource.getMemory());
  Assert.assertEquals(20, reduceResource.getVirtualCores());
  Assert.assertEquals(1234, reduceResource.getMemory());
}

Source File: TestEncryptedShuffle.java From big-c with Apache License 2.0

5 votes

private void encryptedShuffleWithCerts(boolean useClientCerts)
  throws Exception {
  try {
    Configuration conf = new Configuration();
    String keystoresDir = new File(BASEDIR).getAbsolutePath();
    String sslConfsDir =
      KeyStoreTestUtil.getClasspathDir(TestEncryptedShuffle.class);
    KeyStoreTestUtil.setupSSLConfig(keystoresDir, sslConfsDir, conf,
                                    useClientCerts);
    conf.setBoolean(MRConfig.SHUFFLE_SSL_ENABLED_KEY, true);
    startCluster(conf);
    FileSystem fs = FileSystem.get(getJobConf());
    Path inputDir = new Path("input");
    fs.mkdirs(inputDir);
    Writer writer =
      new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt")));
    writer.write("hello");
    writer.close();

    Path outputDir = new Path("output", "output");

    JobConf jobConf = new JobConf(getJobConf());
    jobConf.setInt("mapred.map.tasks", 1);
    jobConf.setInt("mapred.map.max.attempts", 1);
    jobConf.setInt("mapred.reduce.max.attempts", 1);
    jobConf.set("mapred.input.dir", inputDir.toString());
    jobConf.set("mapred.output.dir", outputDir.toString());
    JobClient jobClient = new JobClient(jobConf);
    RunningJob runJob = jobClient.submitJob(jobConf);
    runJob.waitForCompletion();
    Assert.assertTrue(runJob.isComplete());
    Assert.assertTrue(runJob.isSuccessful());
  } finally {
    stopCluster();
  }
}

Source File: TestEncryptedShuffle.java From hadoop with Apache License 2.0

5 votes

private void encryptedShuffleWithCerts(boolean useClientCerts)
  throws Exception {
  try {
    Configuration conf = new Configuration();
    String keystoresDir = new File(BASEDIR).getAbsolutePath();
    String sslConfsDir =
      KeyStoreTestUtil.getClasspathDir(TestEncryptedShuffle.class);
    KeyStoreTestUtil.setupSSLConfig(keystoresDir, sslConfsDir, conf,
                                    useClientCerts);
    conf.setBoolean(MRConfig.SHUFFLE_SSL_ENABLED_KEY, true);
    startCluster(conf);
    FileSystem fs = FileSystem.get(getJobConf());
    Path inputDir = new Path("input");
    fs.mkdirs(inputDir);
    Writer writer =
      new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt")));
    writer.write("hello");
    writer.close();

    Path outputDir = new Path("output", "output");

    JobConf jobConf = new JobConf(getJobConf());
    jobConf.setInt("mapred.map.tasks", 1);
    jobConf.setInt("mapred.map.max.attempts", 1);
    jobConf.setInt("mapred.reduce.max.attempts", 1);
    jobConf.set("mapred.input.dir", inputDir.toString());
    jobConf.set("mapred.output.dir", outputDir.toString());
    JobClient jobClient = new JobClient(jobConf);
    RunningJob runJob = jobClient.submitJob(jobConf);
    runJob.waitForCompletion();
    Assert.assertTrue(runJob.isComplete());
    Assert.assertTrue(runJob.isSuccessful());
  } finally {
    stopCluster();
  }
}

Source File: TestKeyFieldBasedPartitioner.java From hadoop with Apache License 2.0

5 votes

/**
 * Test is key-field-based partitioned works with empty key.
 */
@Test
public void testEmptyKey() throws Exception {
  KeyFieldBasedPartitioner<Text, Text> kfbp = 
    new KeyFieldBasedPartitioner<Text, Text>();
  JobConf conf = new JobConf();
  conf.setInt("num.key.fields.for.partition", 10);
  kfbp.configure(conf);
  assertEquals("Empty key should map to 0th partition", 
               0, kfbp.getPartition(new Text(), new Text(), 10));
}

Source File: MneMapredBufferDataTest.java From mnemonic with Apache License 2.0

5 votes

@BeforeClass
public void setUp() throws IOException {
  m_workdir = new Path(
      System.getProperty("test.tmp.dir", DEFAULT_WORK_DIR));
  m_conf = new JobConf();
  m_rand = Utils.createRandom();
  m_partfns = new ArrayList<String>();

  try {
    m_fs = FileSystem.getLocal(m_conf).getRaw();
    m_fs.delete(m_workdir, true);
    m_fs.mkdirs(m_workdir);
  } catch (IOException e) {
    throw new IllegalStateException("bad fs init", e);
  }

  m_conf.setInt(JobContext.TASK_PARTITION, TASK_PARTITION);
  
  MneConfigHelper.setDir(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, m_workdir.toString());
  MneConfigHelper.setBaseOutputName(m_conf, null, "mapred-buffer-data");

  MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SERVICE_NAME);
  MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SLOT_KEY_ID);
  MneConfigHelper.setDurableTypes(m_conf,
      MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new DurableType[] {DurableType.BUFFER});
  MneConfigHelper.setEntityFactoryProxies(m_conf,
      MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new Class<?>[] {});
  MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SERVICE_NAME);
  MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SLOT_KEY_ID);
  MneConfigHelper.setMemPoolSize(m_conf,
      MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, 1024L * 1024 * 1024 * 4);
  MneConfigHelper.setDurableTypes(m_conf,
      MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new DurableType[] {DurableType.BUFFER});
  MneConfigHelper.setEntityFactoryProxies(m_conf,
      MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new Class<?>[] {});
}

Source File: ValueAggregatorJob.java From RDFS with Apache License 2.0

5 votes

public static void setAggregatorDescriptors(JobConf job
    , Class<? extends ValueAggregatorDescriptor>[] descriptors) {
  job.setInt("aggregator.descriptor.num", descriptors.length);
  //specify the aggregator descriptors
  for(int i=0; i< descriptors.length; i++) {
    job.set("aggregator.descriptor." + i, "UserDefined," + descriptors[i].getName());
  }    
}

Source File: TestMapProcessor.java From tez with Apache License 2.0

4 votes

@Test(timeout = 30000)
public void testMapProcessorProgress() throws Exception {
  String dagName = "mrdag0";
  String vertexName = MultiStageMRConfigUtil.getInitialMapVertexName();
  JobConf jobConf = new JobConf(defaultConf);
  setUpJobConf(jobConf);

  MRHelpers.translateMRConfToTez(jobConf);
  jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);

  jobConf.setBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS, false);

  jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, new Path(workDir,
      "localized-resources").toUri().toString());

  Path mapInput = new Path(workDir, "map0");


  MapUtils.generateInputSplit(localFs, workDir, jobConf, mapInput, 100000);

  InputSpec mapInputSpec = new InputSpec("NullSrcVertex",
      InputDescriptor.create(MRInputLegacy.class.getName())
          .setUserPayload(UserPayload.create(ByteBuffer.wrap(
              MRRuntimeProtos.MRInputUserPayloadProto.newBuilder()
                  .setConfigurationBytes(TezUtils.createByteStringFromConf
                      (jobConf)).build()
                  .toByteArray()))),
      1);
  OutputSpec mapOutputSpec = new OutputSpec("NullDestVertex",
      OutputDescriptor.create(OrderedPartitionedKVOutput.class.getName())
          .setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)), 1);

  TezSharedExecutor sharedExecutor = new TezSharedExecutor(jobConf);
  final LogicalIOProcessorRuntimeTask task = MapUtils.createLogicalTask
      (localFs, workDir, jobConf, 0,
          new Path(workDir, "map0"), new TestUmbilical(), dagName, vertexName,
          Collections.singletonList(mapInputSpec),
          Collections.singletonList(mapOutputSpec), sharedExecutor);

  ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(1);
  Thread monitorProgress = new Thread(new Runnable() {
    @Override
    public void run() {
      float prog = task.getProgress();
      if(prog > 0.0f && prog < 1.0f)
        progressUpdate = prog;
    }
  });

  task.initialize();
  scheduler.scheduleAtFixedRate(monitorProgress, 0, 1,
      TimeUnit.MILLISECONDS);
  task.run();
  Assert.assertTrue("Progress Updates should be captured!",
      progressUpdate > 0.0f && progressUpdate < 1.0f);
  task.close();
  sharedExecutor.shutdownNow();
}

Source File: TestReduceTaskFetchFail.java From RDFS with Apache License 2.0

4 votes

@SuppressWarnings("deprecation")
@Test
public void testcheckAndInformJobTracker() throws Exception {
  //mock creation
  TaskUmbilicalProtocol mockUmbilical = mock(TaskUmbilicalProtocol.class);
  TaskReporter mockTaskReporter = mock(TaskReporter.class);

  JobConf conf = new JobConf();
  conf.setUser("testuser");
  conf.setJobName("testJob");
  conf.setSessionId("testSession");

  TaskAttemptID tid =  new TaskAttemptID();
  TestReduceTask rTask = new TestReduceTask();
  rTask.setConf(conf);

  ReduceTask.ReduceCopier reduceCopier = rTask.new TestReduceCopier(mockUmbilical, conf, mockTaskReporter);
  reduceCopier.checkAndInformJobTracker(1, tid, false);

  verify(mockTaskReporter, never()).progress();

  reduceCopier.checkAndInformJobTracker(10, tid, false);
  verify(mockTaskReporter, times(1)).progress();

  // Test the config setting
  conf.setInt("mapreduce.reduce.shuffle.maxfetchfailures", 3);

  rTask.setConf(conf);
  reduceCopier = rTask.new TestReduceCopier(mockUmbilical, conf, mockTaskReporter);

  reduceCopier.checkAndInformJobTracker(1, tid, false);
  verify(mockTaskReporter, times(1)).progress();

  reduceCopier.checkAndInformJobTracker(3, tid, false);
  verify(mockTaskReporter, times(2)).progress();

  reduceCopier.checkAndInformJobTracker(5, tid, false);
  verify(mockTaskReporter, times(2)).progress();

  reduceCopier.checkAndInformJobTracker(6, tid, false);
  verify(mockTaskReporter, times(3)).progress();

  // test readError and its config
  reduceCopier.checkAndInformJobTracker(7, tid, true);
  verify(mockTaskReporter, times(4)).progress();

  conf.setBoolean("mapreduce.reduce.shuffle.notify.readerror", false);

  rTask.setConf(conf);
  reduceCopier = rTask.new TestReduceCopier(mockUmbilical, conf, mockTaskReporter);

  reduceCopier.checkAndInformJobTracker(7, tid, true);
  verify(mockTaskReporter, times(4)).progress();

}

Source File: DataFsck.java From RDFS with Apache License 2.0

4 votes

List<JobContext> submitJobs(BufferedReader inputReader, int filesPerJob) throws IOException {
  boolean done = false;
  JobClient jClient = new JobClient(createJobConf());
  List<JobContext> submitted = new ArrayList<JobContext>();
  Random rand = new Random();
  do {
    JobConf jobConf = createJobConf();
    final String randomId = Integer.toString(rand.nextInt(Integer.MAX_VALUE), 36);
    Path jobDir = new Path(jClient.getSystemDir(), NAME + "_" + randomId);
    jobConf.set(JOB_DIR_LABEL, jobDir.toString());
    Path log = new Path(jobDir, "_logs");
    FileOutputFormat.setOutputPath(jobConf, log);
    LOG.info("log=" + log);

    // create operation list
    FileSystem fs = jobDir.getFileSystem(jobConf);
    Path opList = new Path(jobDir, "_" + OP_LIST_LABEL);
    jobConf.set(OP_LIST_LABEL, opList.toString());
    int opCount = 0, synCount = 0;
    SequenceFile.Writer opWriter = null;

    try {
      opWriter = SequenceFile.createWriter(fs, jobConf, opList, Text.class,
          Text.class, SequenceFile.CompressionType.NONE);
      String f = null;
      do {
        f = inputReader.readLine();
        if (f == null) {
          done = true;
          break;
        }
        opWriter.append(new Text(f), new Text(f));
        opCount++;
        if (++synCount > SYNC_FILE_MAX) {
          opWriter.sync();
          synCount = 0;
        }
      } while (opCount < filesPerJob);
    } finally {
      if (opWriter != null) {
        opWriter.close();
      }
      fs.setReplication(opList, OP_LIST_REPLICATION); // increase replication for control file
    }

    jobConf.setInt(OP_COUNT_LABEL, opCount);
    RunningJob rJob = jClient.submitJob(jobConf);
    JobContext ctx = new JobContext(rJob, jobConf);
    submitted.add(ctx);
  } while (!done);

  return submitted;
}

Source File: TableMapReduceUtil.java From hbase with Apache License 2.0

2 votes

/**
 * Sets the number of rows to return and cache with each scanner iteration.
 * Higher caching values will enable faster mapreduce jobs at the expense of
 * requiring more heap to contain the cached rows.
 *
 * @param job The current job configuration to adjust.
 * @param batchSize The number of rows to return in batch with each scanner
 * iteration.
 */
public static void setScannerCaching(JobConf job, int batchSize) {
  job.setInt("hbase.client.scanner.caching", batchSize);
}

Source File: HadoopTeraSortTest.java From ignite with Apache License 2.0

2 votes

/**
 * Does actual test TeraSort job Through Ignite API
 *
 * @param gzip Whether to use GZIP.
 */
protected final void teraSort(boolean gzip) throws Exception {
    System.out.println("TeraSort ===============================================================");

    getFileSystem().delete(new Path(sortOutDir), true);

    final JobConf jobConf = new JobConf();

    jobConf.setUser(getUser());

    jobConf.set("fs.defaultFS", getFsBase());

    log().info("Desired number of reduces: " + numReduces());

    jobConf.set("mapreduce.job.reduces", String.valueOf(numReduces()));

    log().info("Desired number of maps: " + numMaps());

    final long splitSize = dataSizeBytes() / numMaps();

    log().info("Desired split size: " + splitSize);

    // Force the split to be of the desired size:
    jobConf.set("mapred.min.split.size", String.valueOf(splitSize));
    jobConf.set("mapred.max.split.size", String.valueOf(splitSize));

    jobConf.setBoolean(HadoopJobProperty.SHUFFLE_MAPPER_STRIPED_OUTPUT.propertyName(), true);
    jobConf.setInt(HadoopJobProperty.SHUFFLE_MSG_SIZE.propertyName(), 4096);

    if (gzip)
        jobConf.setBoolean(HadoopJobProperty.SHUFFLE_MSG_GZIP.propertyName(), true);

    jobConf.set(HadoopJobProperty.JOB_PARTIALLY_RAW_COMPARATOR.propertyName(),
        TextPartiallyRawComparator.class.getName());

    Job job = setupConfig(jobConf);

    HadoopJobId jobId = new HadoopJobId(UUID.randomUUID(), 1);

    IgniteInternalFuture<?> fut = grid(0).hadoop().submit(jobId, createJobInfo(job.getConfiguration(), null));

    fut.get();
}

Java Code Examples for org.apache.hadoop.mapred.JobConf#setInt()