org.apache.hadoop.mapred.JobConf#setBoolean

Source File: TestFileSystem.java From RDFS with Apache License 2.0

6 votes

public static void readTest(FileSystem fs, boolean fastCheck)
  throws Exception {

  fs.delete(READ_DIR, true);

  JobConf job = new JobConf(conf, TestFileSystem.class);
  job.setBoolean("fs.test.fastCheck", fastCheck);


  FileInputFormat.setInputPaths(job, CONTROL_DIR);
  job.setInputFormat(SequenceFileInputFormat.class);

  job.setMapperClass(ReadMapper.class);
  job.setReducerClass(LongSumReducer.class);

  FileOutputFormat.setOutputPath(job, READ_DIR);
  job.setOutputKeyClass(UTF8.class);
  job.setOutputValueClass(LongWritable.class);
  job.setNumReduceTasks(1);
  JobClient.runJob(job);
}

Source File: LinkDbMerger.java From anthelion with Apache License 2.0

6 votes

public static JobConf createMergeJob(Configuration config, Path linkDb, boolean normalize, boolean filter) {
  Path newLinkDb =
    new Path("linkdb-merge-" + 
             Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

  JobConf job = new NutchJob(config);
  job.setJobName("linkdb merge " + linkDb);

  job.setInputFormat(SequenceFileInputFormat.class);

  job.setMapperClass(LinkDbFilter.class);
  job.setBoolean(LinkDbFilter.URL_NORMALIZING, normalize);
  job.setBoolean(LinkDbFilter.URL_FILTERING, filter);
  job.setReducerClass(LinkDbMerger.class);

  FileOutputFormat.setOutputPath(job, newLinkDb);
  job.setOutputFormat(MapFileOutputFormat.class);
  job.setBoolean("mapred.output.compress", true);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Inlinks.class);

  // https://issues.apache.org/jira/browse/NUTCH-1069
  job.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);

  return job;
}

Source File: RegressionPrepare.java From ml-ease with Apache License 2.0

6 votes

@Override
public void run() throws Exception
{
  JobConfig config = super.getJobConfig();
  JobConf conf =
      super.createJobConf(RegressionPrepareMapper.class,
                          RegressionPrepareOutput.SCHEMA$); 
  String mapKey = config.getString(MAP_KEY, "");
  conf.set(MAP_KEY, mapKey);
  conf.setInt(NUM_CLICK_REPLICATES, config.getInt(NUM_CLICK_REPLICATES, 1));
  conf.setBoolean(IGNORE_FEATURE_VALUE, config.getBoolean(IGNORE_FEATURE_VALUE, false));
  int nblocks = config.getInt(NUM_BLOCKS, 0);
  conf.setInt(NUM_BLOCKS, nblocks);
  _logger.info("Running the preparation job of admm with map.key = " + mapKey + " and num.blocks=" + nblocks);
  AvroUtils.runAvroJob(conf);
}

Source File: TestConfigTranslationMRToTez.java From tez with Apache License 2.0

6 votes

@Test(timeout = 5000)
public void testMRToTezKeyTranslation() {
  JobConf confVertex1 = new JobConf();
  confVertex1.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS,
      IntWritable.class.getName());
  confVertex1.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS,
      LongWritable.class.getName());
  confVertex1.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);

  MRHelpers.translateMRConfToTez(confVertex1);

  // Verify translation
  assertEquals(IntWritable.class.getName(), ConfigUtils
      .getIntermediateOutputKeyClass(confVertex1).getName());
  assertEquals(LongWritable.class.getName(), ConfigUtils
      .getIntermediateOutputValueClass(confVertex1).getName());
  assertEquals(IntWritable.class.getName(), ConfigUtils
      .getIntermediateInputKeyClass(confVertex1).getName());
  assertEquals(LongWritable.class.getName(), ConfigUtils
      .getIntermediateInputValueClass(confVertex1).getName());
  assertTrue(ConfigUtils.shouldCompressIntermediateOutput(confVertex1));
  assertTrue(ConfigUtils.isIntermediateInputCompressed(confVertex1));
}

Source File: TestFetcher.java From hadoop with Apache License 2.0

6 votes

@Before
@SuppressWarnings("unchecked") // mocked generics
public void setup() {
  LOG.info(">>>> " + name.getMethodName());
  job = new JobConf();
  job.setBoolean(MRJobConfig.SHUFFLE_FETCH_RETRY_ENABLED, false);
  jobWithRetry = new JobConf();
  jobWithRetry.setBoolean(MRJobConfig.SHUFFLE_FETCH_RETRY_ENABLED, true);
  id = TaskAttemptID.forName("attempt_0_1_r_1_1");
  ss = mock(ShuffleSchedulerImpl.class);
  mm = mock(MergeManagerImpl.class);
  r = mock(Reporter.class);
  metrics = mock(ShuffleClientMetrics.class);
  except = mock(ExceptionReporter.class);
  key = JobTokenSecretManager.createSecretKey(new byte[]{0,0,0,0});
  connection = mock(HttpURLConnection.class);

  allErrs = mock(Counters.Counter.class);
  when(r.getCounter(anyString(), anyString())).thenReturn(allErrs);

  ArrayList<TaskAttemptID> maps = new ArrayList<TaskAttemptID>(1);
  maps.add(map1ID);
  maps.add(map2ID);
  when(ss.getMapsForHost(host)).thenReturn(maps);
}

Source File: TestDeprecatedKeys.java From tez with Apache License 2.0

5 votes

@Test(timeout = 5000)
public void verifyReduceKeyTranslation() {
  JobConf jobConf = new JobConf();

  jobConf.setFloat(MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT, 0.4f);
  jobConf.setLong(MRJobConfig.REDUCE_MEMORY_TOTAL_BYTES, 20000l);
  jobConf.setInt(MRJobConfig.IO_SORT_FACTOR, 2000);
  jobConf.setFloat(MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT, 0.55f);
  jobConf.setFloat(MRJobConfig.REDUCE_MEMTOMEM_THRESHOLD, 0.60f);
  jobConf.setFloat(MRJobConfig.SHUFFLE_MERGE_PERCENT, 0.22f);
  jobConf.setBoolean(MRJobConfig.REDUCE_MEMTOMEM_ENABLED, true);
  jobConf.setFloat(MRJobConfig.REDUCE_INPUT_BUFFER_PERCENT, 0.33f);
  jobConf.setBoolean(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, false);

  MRHelpers.translateMRConfToTez(jobConf);

  assertEquals(0.4f, jobConf.getFloat(
      TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0f), 0.01f);
  assertEquals(20000l, jobConf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY, 0));
  assertEquals(2000,
      jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR, 0));
  assertEquals(0.55f, jobConf.getFloat(
      TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, 0), 0.01f);
  assertEquals(0.60f,
      jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 0),
      0.01f);
  assertEquals(0.22f,
      jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT, 0),
      0.01f);
  assertEquals(true, jobConf.getBoolean(
      TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, false));
  assertEquals(0.33f,
      jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0),
      0.01f);
  assertEquals(false, jobConf.getBoolean(TezConfiguration.TEZ_USER_CLASSPATH_FIRST, true));
}

Source File: TestChild.java From big-c with Apache License 2.0

5 votes

private Job submitAndValidateJob(JobConf conf, int numMaps, int numReds, 
                                 boolean oldConfigs) 
    throws IOException, InterruptedException, ClassNotFoundException {
  conf.setBoolean(OLD_CONFIGS, oldConfigs);
  if (oldConfigs) {
    conf.set(JobConf.MAPRED_TASK_JAVA_OPTS, TASK_OPTS_VAL);
  } else {
    conf.set(JobConf.MAPRED_MAP_TASK_JAVA_OPTS, MAP_OPTS_VAL);
    conf.set(JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS, REDUCE_OPTS_VAL);
  }
  
  conf.set(JobConf.MAPRED_MAP_TASK_LOG_LEVEL, Level.OFF.toString());
  conf.set(JobConf.MAPRED_REDUCE_TASK_LOG_LEVEL, Level.OFF.toString());
  
  Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 
              numMaps, numReds);
  job.setMapperClass(MyMapper.class);
  job.setReducerClass(MyReducer.class);
  assertFalse("Job already has a job tracker connection, before it's submitted",
              job.isConnected());
  job.submit();
  assertTrue("Job doesn't have a job tracker connection, even though it's been submitted",
             job.isConnected());
  job.waitForCompletion(true);
  assertTrue(job.isSuccessful());

  // Check output directory
  FileSystem fs = FileSystem.get(conf);
  assertTrue("Job output directory doesn't exit!", fs.exists(outDir));
  FileStatus[] list = fs.listStatus(outDir, new OutputFilter());
  int numPartFiles = numReds == 0 ? numMaps : numReds;
  assertTrue("Number of part-files is " + list.length + " and not "
      + numPartFiles, list.length == numPartFiles);
  return job;
}

Source File: LinkRank.java From nutch-htmlunit with Apache License 2.0

5 votes

/**
 * Runs the initializer job. The initializer job sets up the nodes with a
 * default starting score for link analysis.
 * 
 * @param nodeDb The node database to use.
 * @param output The job output directory.
 * 
 * @throws IOException If an error occurs while running the initializer job.
 */
private void runInitializer(Path nodeDb, Path output)
  throws IOException {

  // configure the initializer
  JobConf initializer = new NutchJob(getConf());
  initializer.setJobName("LinkAnalysis Initializer");
  FileInputFormat.addInputPath(initializer, nodeDb);
  FileOutputFormat.setOutputPath(initializer, output);
  initializer.setInputFormat(SequenceFileInputFormat.class);
  initializer.setMapperClass(Initializer.class);
  initializer.setMapOutputKeyClass(Text.class);
  initializer.setMapOutputValueClass(Node.class);
  initializer.setOutputKeyClass(Text.class);
  initializer.setOutputValueClass(Node.class);
  initializer.setOutputFormat(MapFileOutputFormat.class);
  initializer.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);

  // run the initializer
  LOG.info("Starting initialization job");
  try {
    JobClient.runJob(initializer);
  }
  catch (IOException e) {
    LOG.error(StringUtils.stringifyException(e));
    throw e;
  }
  LOG.info("Finished initialization job.");
}

Source File: IndexingJob.java From nutch-htmlunit with Apache License 2.0

4 votes

public void index(Path crawlDb, Path linkDb, List<Path> segments,
        boolean noCommit, boolean deleteGone, String params,
        boolean filter, boolean normalize) throws IOException {

    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    long start = System.currentTimeMillis();
    LOG.info("Indexer: starting at " + sdf.format(start));

    final JobConf job = new NutchJob(getConf());
    job.setJobName("Indexer");

    LOG.info("Indexer: deleting gone documents: " + deleteGone);
    LOG.info("Indexer: URL filtering: " + filter);
    LOG.info("Indexer: URL normalizing: " + normalize);   
    
    IndexWriters writers = new IndexWriters(getConf());
    LOG.info(writers.describe());

    IndexerMapReduce.initMRJob(crawlDb, linkDb, segments, job);

    // NOW PASSED ON THE COMMAND LINE AS A HADOOP PARAM
    // job.set(SolrConstants.SERVER_URL, solrUrl);

    job.setBoolean(IndexerMapReduce.INDEXER_DELETE, deleteGone);
    job.setBoolean(IndexerMapReduce.URL_FILTERING, filter);
    job.setBoolean(IndexerMapReduce.URL_NORMALIZING, normalize);

    if (params != null) {
        job.set(IndexerMapReduce.INDEXER_PARAMS, params);
    }

    job.setReduceSpeculativeExecution(false);

    final Path tmp = new Path("tmp_" + System.currentTimeMillis() + "-"
            + new Random().nextInt());

    FileOutputFormat.setOutputPath(job, tmp);
    try {
        JobClient.runJob(job);
        // do the commits once and for all the reducers in one go
        if (!noCommit) {
            writers.open(job,"commit");
            writers.commit();
        }
        long end = System.currentTimeMillis();
        LOG.info("Indexer: finished at " + sdf.format(end) + ", elapsed: "
                + TimingUtil.elapsedTime(start, end));
    } finally {
        FileSystem.get(job).delete(tmp, true);
    }
}

Source File: GenericMRLoadJobCreator.java From RDFS with Apache License 2.0

4 votes

public static JobConf createJob(String[] argv, boolean mapoutputCompressed,
    boolean outputCompressed) throws Exception {

  JobConf job = new JobConf();
  job.setJarByClass(GenericMRLoadGenerator.class);
  job.setMapperClass(SampleMapper.class);
  job.setReducerClass(SampleReducer.class);
  if (!parseArgs(argv, job)) {
    return null;
  }

  if (null == FileOutputFormat.getOutputPath(job)) {
    // No output dir? No writes
    job.setOutputFormat(NullOutputFormat.class);
  }

  if (0 == FileInputFormat.getInputPaths(job).length) {
    // No input dir? Generate random data
    System.err.println("No input path; ignoring InputFormat");
    confRandom(job);
  } else if (null != job.getClass("mapred.indirect.input.format", null)) {
    // specified IndirectInputFormat? Build src list
    JobClient jClient = new JobClient(job);
    Path sysdir = jClient.getSystemDir();
    Random r = new Random();
    Path indirInputFile = new Path(sysdir, Integer.toString(r
        .nextInt(Integer.MAX_VALUE), 36)
        + "_files");
    job.set("mapred.indirect.input.file", indirInputFile.toString());
    SequenceFile.Writer writer = SequenceFile.createWriter(sysdir
        .getFileSystem(job), job, indirInputFile, LongWritable.class,
        Text.class, SequenceFile.CompressionType.NONE);
    try {
      for (Path p : FileInputFormat.getInputPaths(job)) {
        FileSystem fs = p.getFileSystem(job);
        Stack<Path> pathstack = new Stack<Path>();
        pathstack.push(p);
        while (!pathstack.empty()) {
          for (FileStatus stat : fs.listStatus(pathstack.pop())) {
            if (stat.isDir()) {
              if (!stat.getPath().getName().startsWith("_")) {
                pathstack.push(stat.getPath());
              }
            } else {
              writer.sync();
              writer.append(new LongWritable(stat.getLen()), new Text(stat
                  .getPath().toUri().toString()));
            }
          }
        }
      }
    } finally {
      writer.close();
    }
  }

  job.setCompressMapOutput(mapoutputCompressed);
  job.setBoolean("mapred.output.compress", outputCompressed);
  return job;

}

Source File: TradesHdfsDataVerifier.java From gemfirexd-oss with Apache License 2.0

4 votes

public int run(String[] args) throws Exception {

    GfxdDataSerializable.initTypes();

    JobConf conf = new JobConf(getConf());
    conf.setJobName("TradesHdfsDataVerifier");

    String hdfsHomeDir = args[0];
    String url         = args[1];
    String tableName   = args[2];

    System.out.println("TradesHdfsDataVerifier.run() invoked with " 
                       + " hdfsHomeDir = " + hdfsHomeDir 
                       + " url = " + url
                       + " tableName = " + tableName);

    // Job-specific params
    conf.set(RowInputFormat.HOME_DIR, hdfsHomeDir);
    conf.set(RowInputFormat.INPUT_TABLE, tableName);
    conf.setBoolean(RowInputFormat.CHECKPOINT_MODE, false);
    
    conf.setInputFormat(RowInputFormat.class);
    conf.setMapperClass(HdfsDataMapper.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(TradesRow.class);
    
    conf.setReducerClass(HdfsDataReducer.class);
    conf.set(RowOutputFormat.OUTPUT_TABLE, tableName + "_HDFS");
    //conf.set(GfxdOutputFormat.OUTPUT_SCHEMA, "APP");
    conf.set(RowOutputFormat.OUTPUT_URL, url);
    conf.setOutputFormat(RowOutputFormat.class);
    conf.setOutputKeyClass(Key.class);
    conf.setOutputValueClass(TradeOutputObject.class);

    StringBuffer aStr = new StringBuffer();
    aStr.append("HOME_DIR = " + conf.get(RowInputFormat.HOME_DIR) + " ");
    aStr.append("INPUT_TABLE = " + conf.get(RowInputFormat.INPUT_TABLE) + " ");
    aStr.append("OUTPUT_TABLE = " + conf.get(RowOutputFormat.OUTPUT_TABLE) + " ");
    aStr.append("OUTPUT_URL = " + conf.get(RowOutputFormat.OUTPUT_URL) + " ");
    System.out.println("VerifyHdfsData running with the following conf: " + aStr.toString());

    
    FileOutputFormat.setOutputPath(conf, new Path("" + System.currentTimeMillis()));
    
    JobClient.runJob(conf);
    return 0;
  }

Source File: TopBusyAirportGemfirexd.java From gemfirexd-oss with Apache License 2.0

4 votes

public int run(String[] args) throws Exception {

    GfxdDataSerializable.initTypes();

    JobConf conf = new JobConf(getConf());
    conf.setJobName("Busy Airport Count");

    Path outputPath = new Path(args[0]);
    Path intermediateOutputPath = new Path(args[0] + "_int");
    String hdfsHomeDir = args[1];
    String tableName = args[2];

    outputPath.getFileSystem(conf).delete(outputPath, true);
    intermediateOutputPath.getFileSystem(conf).delete(intermediateOutputPath, true);

    conf.set(RowInputFormat.HOME_DIR, hdfsHomeDir);
    conf.set(RowInputFormat.INPUT_TABLE, tableName);
    conf.setBoolean(RowInputFormat.CHECKPOINT_MODE, false);

    conf.setInputFormat(RowInputFormat.class);
    conf.setMapperClass(SampleMapper.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(IntWritable.class);

    conf.setReducerClass(SampleReducer.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    FileOutputFormat.setOutputPath(conf, intermediateOutputPath);

    int rc = JobClient.runJob(conf).isSuccessful() ? 0 : 1;
    if (rc == 0) {
      JobConf topConf = new JobConf(getConf());
      topConf.setJobName("Top Busy Airport");

      String hdfsFS = topConf.get("fs.defaultFS");
      URI hdfsUri = URI.create(hdfsFS);
      hdfsUri.getHost();

      // Assume that SqlFire locator is running alongside the namenode
      topConf.set(RowOutputFormat.OUTPUT_URL, "jdbc:gemfirexd://" + hdfsUri.getHost() + ":1527");
      //topConf.set(ddGfxdOutputFormat.OUTPUT_SCHEMA, "APP");
      //topConf.set(GfxdOutputFormat.OUTPUT_TABLE, "BUSY_AIRPORT");
      topConf.set(RowOutputFormat.OUTPUT_TABLE, "APP.BUSY_AIRPORT");

      // Only run a single reducer
      topConf.setNumReduceTasks(1);

      FileInputFormat.setInputPaths(topConf, intermediateOutputPath);

      topConf.setInputFormat(TextInputFormat.class);
      topConf.setMapperClass(TopBusyAirportMapper.class);
      topConf.setMapOutputKeyClass(Text.class);
      topConf.setMapOutputValueClass(StringIntPair.class);

      topConf.setReducerClass(TopBusyAirportReducer.class);
      topConf.setOutputKeyClass(Key.class);
      topConf.setOutputValueClass(BusyAirportModel.class);
      topConf.setOutputFormat(RowOutputFormat.class);

      rc = JobClient.runJob(topConf).isSuccessful() ? 0 : 1;
    }
    return rc;
  }

Source File: VerifyHdfsDataUsingMR.java From gemfirexd-oss with Apache License 2.0

4 votes

public int run(String[] args) throws Exception {

    // todo@lhughes -- why do we need this?
    GfxdDataSerializable.initTypes();

    JobConf conf = new JobConf(getConf());
    conf.setJobName("hdfsMapReduce");

    String hdfsHomeDir = args[0];
    String url         = args[1];
    String tableName   = args[2];

    System.out.println("VerifyHdfsData.run() invoked with " 
                       + " hdfsHomeDir = " + hdfsHomeDir 
                       + " url = " + url
                       + " tableName = " + tableName);

    // Job-specific params
    conf.set(RowInputFormat.HOME_DIR, hdfsHomeDir);
    conf.set(RowInputFormat.INPUT_TABLE, tableName);
    conf.setBoolean(RowInputFormat.CHECKPOINT_MODE, false);
    
    conf.setInputFormat(RowInputFormat.class);
    conf.setMapperClass(HdfsDataMapper.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(MyRow.class);
    
    conf.setReducerClass(HdfsDataReducer.class);
    conf.set(RowOutputFormat.OUTPUT_TABLE, "TRADE.HDFS_CUSTOMERS");
    //conf.set(GfxdOutputFormat.OUTPUT_SCHEMA, "APP");
    conf.set(RowOutputFormat.OUTPUT_URL, url);
    conf.setOutputFormat(RowOutputFormat.class);
    conf.setOutputKeyClass(Key.class);
    conf.setOutputValueClass(DataObject.class);

    StringBuffer aStr = new StringBuffer();
    aStr.append("HOME_DIR = " + conf.get(RowInputFormat.HOME_DIR) + " ");
    aStr.append("INPUT_TABLE = " + conf.get(RowInputFormat.INPUT_TABLE) + " ");
    aStr.append("OUTPUT_TABLE = " + conf.get(RowOutputFormat.OUTPUT_TABLE) + " ");
    aStr.append("OUTPUT_URL = " + conf.get(RowOutputFormat.OUTPUT_URL) + " ");
    System.out.println("VerifyHdfsData running with the following conf: " + aStr.toString());

    // not planning to use this, but I get an NPE without it
    FileOutputFormat.setOutputPath(conf, new Path("" + System.currentTimeMillis()));
    
    JobClient.runJob(conf);
    return 0;
  }

Source File: TestTaskAttempt.java From hadoop with Apache License 2.0

4 votes

@Test
public void testAppDiognosticEventOnNewTask() throws Exception {
  ApplicationId appId = ApplicationId.newInstance(1, 2);
  ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(
      appId, 0);
  JobId jobId = MRBuilderUtils.newJobId(appId, 1);
  TaskId taskId = MRBuilderUtils.newTaskId(jobId, 1, TaskType.MAP);
  TaskAttemptId attemptId = MRBuilderUtils.newTaskAttemptId(taskId, 0);
  Path jobFile = mock(Path.class);

  MockEventHandler eventHandler = new MockEventHandler();
  TaskAttemptListener taListener = mock(TaskAttemptListener.class);
  when(taListener.getAddress()).thenReturn(
      new InetSocketAddress("localhost", 0));

  JobConf jobConf = new JobConf();
  jobConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
  jobConf.setBoolean("fs.file.impl.disable.cache", true);
  jobConf.set(JobConf.MAPRED_MAP_TASK_ENV, "");
  jobConf.set(MRJobConfig.APPLICATION_ATTEMPT_ID, "10");

  TaskSplitMetaInfo splits = mock(TaskSplitMetaInfo.class);
  when(splits.getLocations()).thenReturn(new String[] { "127.0.0.1" });

  AppContext appCtx = mock(AppContext.class);
  ClusterInfo clusterInfo = mock(ClusterInfo.class);
  Resource resource = mock(Resource.class);
  when(appCtx.getClusterInfo()).thenReturn(clusterInfo);
  when(resource.getMemory()).thenReturn(1024);

  TaskAttemptImpl taImpl = new MapTaskAttemptImpl(taskId, 1, eventHandler,
      jobFile, 1, splits, jobConf, taListener,
      new Token(), new Credentials(), new SystemClock(), appCtx);

  NodeId nid = NodeId.newInstance("127.0.0.1", 0);
  ContainerId contId = ContainerId.newContainerId(appAttemptId, 3);
  Container container = mock(Container.class);
  when(container.getId()).thenReturn(contId);
  when(container.getNodeId()).thenReturn(nid);
  when(container.getNodeHttpAddress()).thenReturn("localhost:0");
  taImpl.handle(new TaskAttemptDiagnosticsUpdateEvent(attemptId,
      "Task got killed"));
  assertFalse(
      "InternalError occurred trying to handle TA_DIAGNOSTICS_UPDATE on assigned task",
      eventHandler.internalError);
}

Source File: EventInputFormatTest.java From gemfirexd-oss with Apache License 2.0

4 votes

private void doTestRowSerDe(boolean concurrencyChecks) throws Exception {
  getConnection();
  Connection conn = startNetserverAndGetLocalNetConnection();
  final long statTS = System.currentTimeMillis();
  Statement st = conn.createStatement();
  st.execute("create hdfsstore myhdfs namenode 'localhost' homedir '" + HDFS_DIR + "' batchtimeinterval 5000 milliseconds");
  String concurrency = "persistent ENABLE CONCURRENCY CHECKS";
  st.execute("create table app.mytab1 (col1 int primary key, col2 varchar(100)) partition by primary key buckets 1 hdfsstore (myhdfs) "
      +(concurrencyChecks ? concurrency : ""));

  PreparedStatement ps = conn.prepareStatement("insert into mytab1 values (?, ?)");
  ps.setInt(1, 1);
  ps.setString(2, "Value-1");
  ps.execute();
  
  //Wait for data to get to HDFS...
  String qname = HDFSStoreFactoryImpl.getEventQueueName("/APP/MYTAB1");
  st.execute("CALL SYS.WAIT_FOR_SENDER_QUEUE_FLUSH('" + qname + "', 1, 0)");
  
  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.get(conf);
  FileStatus[] list = fs.listStatus(new Path(HDFS_DIR + "/APP_MYTAB1/0/"));
  assertEquals(1, list.length);
  
  conf.set(RowInputFormat.INPUT_TABLE, "MYTAB1");
  conf.set(RowInputFormat.HOME_DIR, HDFS_DIR);
  
  JobConf job = new JobConf(conf);
  job.setBoolean(RowInputFormat.CHECKPOINT_MODE, false);
  RowInputFormat ipformat = new RowInputFormat();
  InputSplit[] splits = ipformat.getSplits(job, 2);
  assertEquals(1, splits.length);
  RecordReader<Key, Row> rr = ipformat.getRecordReader(splits[0], job, null);
  Key key = rr.createKey();
  Row value = rr.createValue();
  assertTrue(rr.next(key, value));
  assertEquals(1, value.getRowAsResultSet().getInt(1));
  assertEquals("Value-1", value.getRowAsResultSet().getString(2));
  assertTrue(value.getTimestamp() > statTS);
  assertFalse(value.getRowAsResultSet().next());
  
  ByteArrayOutputStream baos = new ByteArrayOutputStream();
  DataOutputStream dos = new DataOutputStream(baos);
  value.write(dos);
  dos.close();
  
  byte[] buf = baos.toByteArray();
  DataInputStream dis = new DataInputStream(new ByteArrayInputStream(buf));
  Row row = new Row();
  row.readFields(dis);
  dis.close();
  
  assertEquals(1, row.getRowAsResultSet().getInt(1));
  assertEquals("Value-1", row.getRowAsResultSet().getString(2));
  assertFalse(value.getRowAsResultSet().next());
  
  TestUtil.shutDown();
}

Source File: SolrIndexer.java From anthelion with Apache License 2.0

4 votes

public void indexSolr(String solrUrl, Path crawlDb, Path linkDb,
    List<Path> segments, boolean noCommit, boolean deleteGone, String solrParams,
    boolean filter, boolean normalize) throws IOException {
    
  SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
  long start = System.currentTimeMillis();
  LOG.info("SolrIndexer: starting at " + sdf.format(start));

  final JobConf job = new NutchJob(getConf());
  job.setJobName("index-solr " + solrUrl);

  LOG.info("SolrIndexer: deleting gone documents: " + deleteGone);
  LOG.info("SolrIndexer: URL filtering: " + filter);
  LOG.info("SolrIndexer: URL normalizing: " + normalize);
  
  IndexerMapReduce.initMRJob(crawlDb, linkDb, segments, job);

  job.set(SolrConstants.SERVER_URL, solrUrl);
  job.setBoolean(IndexerMapReduce.INDEXER_DELETE, deleteGone);
  job.setBoolean(IndexerMapReduce.URL_FILTERING, filter);
  job.setBoolean(IndexerMapReduce.URL_NORMALIZING, normalize);
  if (solrParams != null) {
    job.set(SolrConstants.PARAMS, solrParams);
  }
  NutchIndexWriterFactory.addClassToConf(job, SolrWriter.class);

  job.setReduceSpeculativeExecution(false);

  final Path tmp = new Path("tmp_" + System.currentTimeMillis() + "-" +
                       new Random().nextInt());

  FileOutputFormat.setOutputPath(job, tmp);
  try {
    JobClient.runJob(job);
    // do the commits once and for all the reducers in one go
    SolrServer solr =  SolrUtils.getCommonsHttpSolrServer(job);

    if (!noCommit) {
      solr.commit();
    }
    long end = System.currentTimeMillis();
    LOG.info("SolrIndexer: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
  }
  catch (Exception e){
    LOG.error(e.toString());
  } finally {
    FileSystem.get(job).delete(tmp, true);
  }
}

Source File: Submitter.java From RDFS with Apache License 2.0

2 votes

/**
 * Set whether to keep the command file for debugging
 * @param conf the configuration to modify
 * @param keep the new value
 */
public static void setKeepCommandFile(JobConf conf, boolean keep) {
  conf.setBoolean("hadoop.pipes.command-file.keep", keep);
}

Source File: Submitter.java From hadoop-gpu with Apache License 2.0

2 votes

/**
 * Set whether the job will use a Java RecordWriter.
 * @param conf the configuration to modify
 * @param value the new value to set
 */
public static void setIsJavaRecordWriter(JobConf conf, boolean value) {
  conf.setBoolean("hadoop.pipes.java.recordwriter", value);
}

Source File: Submitter.java From big-c with Apache License 2.0

2 votes

/**
 * Set whether the Mapper is written in Java.
 * @param conf the configuration to modify
 * @param value the new value
 */
public static void setIsJavaMapper(JobConf conf, boolean value) {
  conf.setBoolean(Submitter.IS_JAVA_MAP, value);
}

Source File: Submitter.java From hadoop with Apache License 2.0

2 votes

/**
 * Set whether the job is using a Java RecordReader.
 * @param conf the configuration to modify
 * @param value the new value
 */
public static void setIsJavaRecordReader(JobConf conf, boolean value) {
  conf.setBoolean(Submitter.IS_JAVA_RR, value);
}

Java Code Examples for org.apache.hadoop.mapred.JobConf#setBoolean()