org.apache.hadoop.mapreduce.lib.output.NullOutputFormat Java Exaples

Source File: TestImportTsv.java From hbase with Apache License 2.0

6 votes

@Test
public void testJobConfigurationsWithDryMode() throws Exception {
  Path bulkOutputPath = new Path(util.getDataTestDirOnTestFS(tn.getNameAsString()),"hfiles");
  String INPUT_FILE = "InputFile1.csv";
  // Prepare the arguments required for the test.
  String[] argsArray = new String[] {
      "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B",
      "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=,",
      "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + bulkOutputPath.toString(),
      "-D" + ImportTsv.DRY_RUN_CONF_KEY + "=true",
      tn.getNameAsString(),
      INPUT_FILE };
  assertEquals("running test job configuration failed.", 0, ToolRunner.run(
      new Configuration(util.getConfiguration()),
      new ImportTsv() {
        @Override
        public int run(String[] args) throws Exception {
          Job job = createSubmittableJob(getConf(), args);
          assertTrue(job.getOutputFormatClass().equals(NullOutputFormat.class));
          return 0;
        }
      }, argsArray));
  // Delete table created by createSubmittableJob.
  util.deleteTable(tn);
}

Source File: GenerateDistCacheData.java From big-c with Apache License 2.0

6 votes

@Override
public Job call() throws IOException, InterruptedException,
                         ClassNotFoundException {
  UserGroupInformation ugi = UserGroupInformation.getLoginUser();
  ugi.doAs( new PrivilegedExceptionAction <Job>() {
     public Job run() throws IOException, ClassNotFoundException,
                             InterruptedException {
      job.setMapperClass(GenDCDataMapper.class);
      job.setNumReduceTasks(0);
      job.setMapOutputKeyClass(NullWritable.class);
      job.setMapOutputValueClass(BytesWritable.class);
      job.setInputFormatClass(GenDCDataFormat.class);
      job.setOutputFormatClass(NullOutputFormat.class);
      job.setJarByClass(GenerateDistCacheData.class);
      try {
        FileInputFormat.addInputPath(job, new Path("ignored"));
      } catch (IOException e) {
        LOG.error("Error while adding input path ", e);
      }
      job.submit();
      return job;
    }
  });
  return job;
}

Source File: SleepJob.java From big-c with Apache License 2.0

6 votes

public Job createJob(int numMapper, int numReducer, 
                     long mapSleepTime, int mapSleepCount, 
                     long reduceSleepTime, int reduceSleepCount) 
    throws IOException {
  Configuration conf = getConf();
  conf.setLong(MAP_SLEEP_TIME, mapSleepTime);
  conf.setLong(REDUCE_SLEEP_TIME, reduceSleepTime);
  conf.setInt(MAP_SLEEP_COUNT, mapSleepCount);
  conf.setInt(REDUCE_SLEEP_COUNT, reduceSleepCount);
  conf.setInt(MRJobConfig.NUM_MAPS, numMapper);
  Job job = Job.getInstance(conf, "sleep");
  job.setNumReduceTasks(numReducer);
  job.setJarByClass(SleepJob.class);
  job.setMapperClass(SleepMapper.class);
  job.setMapOutputKeyClass(IntWritable.class);
  job.setMapOutputValueClass(NullWritable.class);
  job.setReducerClass(SleepReducer.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setInputFormatClass(SleepInputFormat.class);
  job.setPartitionerClass(SleepJobPartitioner.class);
  job.setSpeculativeExecution(false);
  job.setJobName("Sleep job");
  FileInputFormat.addInputPath(job, new Path("ignored"));
  return job;
}

Source File: TestMapCollection.java From big-c with Apache License 2.0

6 votes

private static void runTest(String name, Job job) throws Exception {
  job.setNumReduceTasks(1);
  job.getConfiguration().set(MRConfig.FRAMEWORK_NAME, MRConfig.LOCAL_FRAMEWORK_NAME);
  job.getConfiguration().setInt(MRJobConfig.IO_SORT_FACTOR, 1000);
  job.getConfiguration().set("fs.defaultFS", "file:///");
  job.getConfiguration().setInt("test.mapcollection.num.maps", 1);
  job.setInputFormatClass(FakeIF.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setMapperClass(Mapper.class);
  job.setReducerClass(SpillReducer.class);
  job.setMapOutputKeyClass(KeyWritable.class);
  job.setMapOutputValueClass(ValWritable.class);
  job.setSortComparatorClass(VariableComparator.class);

  LOG.info("Running " + name);
  assertTrue("Job failed!", job.waitForCompletion(false));
}

Source File: TestTableInputFormatScanBase.java From hbase with Apache License 2.0

6 votes

/**
 * Run MR job to check the number of mapper = expectedNumOfSplits
 */
protected void testNumOfSplitsMR(int splitsPerRegion, int expectedNumOfSplits)
    throws IOException, InterruptedException, ClassNotFoundException {
  String jobName = "TestJobForNumOfSplits-MR";
  LOG.info("Before map/reduce startup - job " + jobName);
  JobConf c = new JobConf(TEST_UTIL.getConfiguration());
  Scan scan = new Scan();
  scan.addFamily(INPUT_FAMILYS[0]);
  scan.addFamily(INPUT_FAMILYS[1]);
  c.setInt("hbase.mapreduce.tableinput.mappers.per.region", splitsPerRegion);
  c.set(KEY_STARTROW, "");
  c.set(KEY_LASTROW, "");
  Job job = Job.getInstance(c, jobName);
  TableMapReduceUtil.initTableMapperJob(TABLE_NAME.getNameAsString(), scan, ScanMapper.class,
    ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
  job.setReducerClass(ScanReducer.class);
  job.setNumReduceTasks(1);
  job.setOutputFormatClass(NullOutputFormat.class);
  assertTrue("job failed!", job.waitForCompletion(true));
  // for some reason, hbase does not expose JobCounter.TOTAL_LAUNCHED_MAPS,
  // we use TaskCounter.SHUFFLED_MAPS to get total launched maps
  assertEquals("Saw the wrong count of mappers per region", expectedNumOfSplits,
    job.getCounters().findCounter(TaskCounter.SHUFFLED_MAPS).getValue());
}

Source File: FailJob.java From big-c with Apache License 2.0

6 votes

public Job createJob(boolean failMappers, boolean failReducers, Path inputFile) 
    throws IOException {
  Configuration conf = getConf();
  conf.setBoolean(FAIL_MAP, failMappers);
  conf.setBoolean(FAIL_REDUCE, failReducers);
  Job job = Job.getInstance(conf, "fail");
  job.setJarByClass(FailJob.class);
  job.setMapperClass(FailMapper.class);
  job.setMapOutputKeyClass(LongWritable.class);
  job.setMapOutputValueClass(NullWritable.class);
  job.setReducerClass(FailReducer.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setInputFormatClass(TextInputFormat.class);
  job.setSpeculativeExecution(false);
  job.setJobName("Fail job");
  FileInputFormat.addInputPath(job, inputFile);
  return job;
}

Source File: TestTableInputFormat.java From hbase with Apache License 2.0

6 votes

void testInputFormat(Class<? extends InputFormat> clazz)
    throws IOException, InterruptedException, ClassNotFoundException {
  final Job job = MapreduceTestingShim.createJob(UTIL.getConfiguration());
  job.setInputFormatClass(clazz);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setMapperClass(ExampleVerifier.class);
  job.setNumReduceTasks(0);

  LOG.debug("submitting job.");
  assertTrue("job failed!", job.waitForCompletion(true));
  assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, job.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getValue());
  assertEquals("Saw any instances of the filtered out row.", 0, job.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getValue());
  assertEquals("Saw the wrong number of instances of columnA.", 1, job.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getValue());
  assertEquals("Saw the wrong number of instances of columnB.", 1, job.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getValue());
  assertEquals("Saw the wrong count of values for the filtered-for row.", 2, job.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getValue());
  assertEquals("Saw the wrong count of values for the filtered-out row.", 0, job.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getValue());
}

Source File: CredentialsTestJob.java From big-c with Apache License 2.0

6 votes

public Job createJob() 
throws IOException {
  Configuration conf = getConf();
  conf.setInt(MRJobConfig.NUM_MAPS, 1);
  Job job = Job.getInstance(conf, "test");
  job.setNumReduceTasks(1);
  job.setJarByClass(CredentialsTestJob.class);
  job.setNumReduceTasks(1);
  job.setMapperClass(CredentialsTestJob.CredentialsTestMapper.class);
  job.setMapOutputKeyClass(IntWritable.class);
  job.setMapOutputValueClass(NullWritable.class);
  job.setReducerClass(CredentialsTestJob.CredentialsTestReducer.class);
  job.setInputFormatClass(SleepJob.SleepInputFormat.class);
  job.setPartitionerClass(SleepJob.SleepJobPartitioner.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setSpeculativeExecution(false);
  job.setJobName("test job");
  FileInputFormat.addInputPath(job, new Path("ignored"));
  return job;
}

Source File: ReadUsingMR.java From parquet-mr with Apache License 2.0

6 votes

public List<Message> read(Path parquetPath) throws Exception {

    synchronized (ReadUsingMR.class) {
      outputMessages = new ArrayList<Message>();

      final Job job = new Job(conf, "read");
      job.setInputFormatClass(ProtoParquetInputFormat.class);
      ProtoParquetInputFormat.setInputPaths(job, parquetPath);
      if (projection != null) {
        ProtoParquetInputFormat.setRequestedProjection(job, projection);
      }

      job.setMapperClass(ReadingMapper.class);
      job.setNumReduceTasks(0);

      job.setOutputFormatClass(NullOutputFormat.class);

      WriteUsingMR.waitForJob(job);

      List<Message> result = Collections.unmodifiableList(outputMessages);
      outputMessages = null;
      return result;
    }
  }

Source File: IndexScrutinyTool.java From phoenix with Apache License 2.0

6 votes

private Job configureSubmittableJob(Job job, Path outputPath, Class<IndexScrutinyMapperForTest> mapperClass) throws Exception {
    Configuration conf = job.getConfiguration();
    conf.setBoolean("mapreduce.job.user.classpath.first", true);
    HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
    job.setJarByClass(IndexScrutinyTool.class);
    job.setOutputFormatClass(NullOutputFormat.class);
    if (outputInvalidRows && OutputFormat.FILE.equals(outputFormat)) {
        job.setOutputFormatClass(TextOutputFormat.class);
        FileOutputFormat.setOutputPath(job, outputPath);
    }
    job.setMapperClass((mapperClass == null ? IndexScrutinyMapper.class : mapperClass));
    job.setNumReduceTasks(0);
    // Set the Output classes
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    TableMapReduceUtil.addDependencyJars(job);
    return job;
}

Source File: GenerateDistCacheData.java From hadoop with Apache License 2.0

6 votes

@Override
public Job call() throws IOException, InterruptedException,
                         ClassNotFoundException {
  UserGroupInformation ugi = UserGroupInformation.getLoginUser();
  ugi.doAs( new PrivilegedExceptionAction <Job>() {
     public Job run() throws IOException, ClassNotFoundException,
                             InterruptedException {
      job.setMapperClass(GenDCDataMapper.class);
      job.setNumReduceTasks(0);
      job.setMapOutputKeyClass(NullWritable.class);
      job.setMapOutputValueClass(BytesWritable.class);
      job.setInputFormatClass(GenDCDataFormat.class);
      job.setOutputFormatClass(NullOutputFormat.class);
      job.setJarByClass(GenerateDistCacheData.class);
      try {
        FileInputFormat.addInputPath(job, new Path("ignored"));
      } catch (IOException e) {
        LOG.error("Error while adding input path ", e);
      }
      job.submit();
      return job;
    }
  });
  return job;
}

Source File: SleepJob.java From hadoop with Apache License 2.0

6 votes

public Job createJob(int numMapper, int numReducer, 
                     long mapSleepTime, int mapSleepCount, 
                     long reduceSleepTime, int reduceSleepCount) 
    throws IOException {
  Configuration conf = getConf();
  conf.setLong(MAP_SLEEP_TIME, mapSleepTime);
  conf.setLong(REDUCE_SLEEP_TIME, reduceSleepTime);
  conf.setInt(MAP_SLEEP_COUNT, mapSleepCount);
  conf.setInt(REDUCE_SLEEP_COUNT, reduceSleepCount);
  conf.setInt(MRJobConfig.NUM_MAPS, numMapper);
  Job job = Job.getInstance(conf, "sleep");
  job.setNumReduceTasks(numReducer);
  job.setJarByClass(SleepJob.class);
  job.setMapperClass(SleepMapper.class);
  job.setMapOutputKeyClass(IntWritable.class);
  job.setMapOutputValueClass(NullWritable.class);
  job.setReducerClass(SleepReducer.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setInputFormatClass(SleepInputFormat.class);
  job.setPartitionerClass(SleepJobPartitioner.class);
  job.setSpeculativeExecution(false);
  job.setJobName("Sleep job");
  FileInputFormat.addInputPath(job, new Path("ignored"));
  return job;
}

Source File: TestMapCollection.java From hadoop with Apache License 2.0

6 votes

private static void runTest(String name, Job job) throws Exception {
  job.setNumReduceTasks(1);
  job.getConfiguration().set(MRConfig.FRAMEWORK_NAME, MRConfig.LOCAL_FRAMEWORK_NAME);
  job.getConfiguration().setInt(MRJobConfig.IO_SORT_FACTOR, 1000);
  job.getConfiguration().set("fs.defaultFS", "file:///");
  job.getConfiguration().setInt("test.mapcollection.num.maps", 1);
  job.setInputFormatClass(FakeIF.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setMapperClass(Mapper.class);
  job.setReducerClass(SpillReducer.class);
  job.setMapOutputKeyClass(KeyWritable.class);
  job.setMapOutputValueClass(ValWritable.class);
  job.setSortComparatorClass(VariableComparator.class);

  LOG.info("Running " + name);
  assertTrue("Job failed!", job.waitForCompletion(false));
}

Source File: TableInputFormatTest.java From hgraphdb with Apache License 2.0

6 votes

private void runTestOnTable() throws InterruptedException, ClassNotFoundException {
    Job job = null;
    try {
        Configuration conf = graph.configuration().toHBaseConfiguration();
        job = Job.getInstance(conf, "test123");
        job.setOutputFormatClass(NullOutputFormat.class);
        job.setNumReduceTasks(0);
        Scan scan = new Scan();
        scan.addColumn(FAMILY_NAME, COLUMN_NAME);
        scan.setTimeRange(MINSTAMP, MAXSTAMP);
        scan.setMaxVersions();
        TableMapReduceUtil.initTableMapperJob(TABLE_NAME.getNameAsString(),
                scan, ProcessTimeRangeMapper.class, Text.class, Text.class, job,
                true, TableInputFormat.class);
        job.waitForCompletion(true);
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } finally {
        if (job != null) {
            FileUtil.fullyDelete(
                    new File(job.getConfiguration().get("hadoop.tmp.dir")));
        }
    }
}

Source File: FailJob.java From hadoop with Apache License 2.0

6 votes

public Job createJob(boolean failMappers, boolean failReducers, Path inputFile) 
    throws IOException {
  Configuration conf = getConf();
  conf.setBoolean(FAIL_MAP, failMappers);
  conf.setBoolean(FAIL_REDUCE, failReducers);
  Job job = Job.getInstance(conf, "fail");
  job.setJarByClass(FailJob.class);
  job.setMapperClass(FailMapper.class);
  job.setMapOutputKeyClass(LongWritable.class);
  job.setMapOutputValueClass(NullWritable.class);
  job.setReducerClass(FailReducer.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setInputFormatClass(TextInputFormat.class);
  job.setSpeculativeExecution(false);
  job.setJobName("Fail job");
  FileInputFormat.addInputPath(job, inputFile);
  return job;
}

Source File: TestMiniCoronaFederatedJT.java From RDFS with Apache License 2.0

6 votes

public void testOneRemoteJT() throws Exception {
  LOG.info("Starting testOneRemoteJT");
  String[] racks = "/rack-1".split(",");
  String[] trackers = "tracker-1".split(",");
  corona = new MiniCoronaCluster.Builder().numTaskTrackers(1).racks(racks)
      .hosts(trackers).build();
  Configuration conf = corona.createJobConf();
  conf.set("mapred.job.tracker", "corona");
  conf.set("mapred.job.tracker.class", CoronaJobTracker.class.getName());
  String locationsCsv = "tracker-1";
  conf.set("test.locations", locationsCsv);
  conf.setBoolean("mapred.coronajobtracker.forceremote", true);
  Job job = new Job(conf);
  job.setMapperClass(TstJob.TestMapper.class);
  job.setInputFormatClass(TstJob.TestInputFormat.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setNumReduceTasks(0);
  job.getConfiguration().set("io.sort.record.pct", "0.50");
  job.getConfiguration().set("io.sort.mb", "25");
  boolean success = job.waitForCompletion(true);
  assertTrue("Job did not succeed", success);
}

Source File: CredentialsTestJob.java From hadoop with Apache License 2.0

6 votes

public Job createJob() 
throws IOException {
  Configuration conf = getConf();
  conf.setInt(MRJobConfig.NUM_MAPS, 1);
  Job job = Job.getInstance(conf, "test");
  job.setNumReduceTasks(1);
  job.setJarByClass(CredentialsTestJob.class);
  job.setNumReduceTasks(1);
  job.setMapperClass(CredentialsTestJob.CredentialsTestMapper.class);
  job.setMapOutputKeyClass(IntWritable.class);
  job.setMapOutputValueClass(NullWritable.class);
  job.setReducerClass(CredentialsTestJob.CredentialsTestReducer.class);
  job.setInputFormatClass(SleepJob.SleepInputFormat.class);
  job.setPartitionerClass(SleepJob.SleepJobPartitioner.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setSpeculativeExecution(false);
  job.setJobName("test job");
  FileInputFormat.addInputPath(job, new Path("ignored"));
  return job;
}

Source File: CassandraScanJobIT.java From titan1withtp3.1 with Apache License 2.0

6 votes

private Job getVertexJobWithDefaultMapper(org.apache.hadoop.conf.Configuration c) throws IOException {

        Job job = Job.getInstance(c);

        job.setJarByClass(HadoopScanMapper.class);
        job.setJobName("testPartitionedVertexScan");
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(NullWritable.class);
        job.setMapOutputKeyClass(NullWritable.class);
        job.setMapOutputValueClass(NullWritable.class);
        job.setNumReduceTasks(0);
        job.setOutputFormatClass(NullOutputFormat.class);
        job.setInputFormatClass(CassandraInputFormat.class);

        return job;
    }

Source File: MetricsInputFormatTest.java From accumulo-recipes with Apache License 2.0

5 votes

@Test
public void test() throws IOException, ClassNotFoundException, InterruptedException, AccumuloSecurityException, AccumuloException, TableExistsException, TableNotFoundException {

    Instance instance = new MockInstance("metricsInst");
    Connector connector = instance.getConnector("root", "".getBytes());
    AccumuloFeatureStore store = new AccumuloFeatureStore(connector);
    store.initialize();
    store.save(singleton(metric));

    Job job = new Job(new Configuration());
    job.setJarByClass(getClass());
    job.setMapperClass(TestMapper.class);
    job.setNumReduceTasks(0);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setInputFormatClass(FeaturesInputFormat.class);
    FeaturesInputFormat.setInputInfo(job, "root", "".getBytes(), new Authorizations());
    FeaturesInputFormat.setQueryInfo(job, new Date(0), new Date(), TimeUnit.MINUTES, "group", "type", "name", MetricFeature.class);
    FeaturesInputFormat.setMockInstance(job, "metricsInst");
    job.setOutputFormatClass(NullOutputFormat.class);

    job.submit();
    job.waitForCompletion(true);

    assertEquals(metric.getGroup(), TestMapper.metric.getGroup());
    assertEquals(metric.getType(), TestMapper.metric.getType());
    assertEquals(metric.getName(), TestMapper.metric.getName());
    assertEquals(metric.getVisibility(), TestMapper.metric.getVisibility());
    assertEquals(metric.getVector(), TestMapper.metric.getVector());

}

Source File: EventInputFormatIT.java From accumulo-recipes with Apache License 2.0

5 votes

@Test
public void testNoQuery() throws Exception {

  Connector connector = accumuloMiniClusterDriver.getConnector();
  AccumuloEventStore store = new AccumuloEventStore(connector);
  event = EventBuilder.create("", UUID.randomUUID().toString(), System.currentTimeMillis())
      .attr(new Attribute("key1", "val1"))
      .attr(new Attribute("key2", false)).build();
  store.save(singleton(event));
  store.flush();
    AccumuloTestUtils.dumpTable(connector, "eventStore_shard");
  Job job = new Job(new Configuration());
  job.setJarByClass(getClass());
  job.setMapperClass(TestMapper.class);
  job.setNumReduceTasks(0);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(Text.class);
  job.setInputFormatClass(EventInputFormat.class);
    EventInputFormat.setZooKeeperInstance(job, accumuloMiniClusterDriver.getClientConfiguration());
    EventInputFormat.setInputInfo(job, "root", accumuloMiniClusterDriver.getRootPassword().getBytes(), new Authorizations());
  EventInputFormat.setQueryInfo(job, new Date(System.currentTimeMillis() - 50000), new Date(), Collections.singleton(""));
  job.setOutputFormatClass(NullOutputFormat.class);

  job.submit();
  job.waitForCompletion(true);

  System.out.println("RESULT: " + TestMapper.entry);

  assertNotNull(TestMapper.entry);
  assertEquals(TestMapper.entry.getId(), event.getId());
  assertEquals(new HashSet<Attribute>(TestMapper.entry.getAttributes()), new HashSet<Attribute>(event.getAttributes()));

}

Source File: AccumuloMrsPyramidOutputFormat.java From mrgeo with Apache License 2.0

5 votes

@Override
public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException,
    InterruptedException
{
  // TODO Auto-generated method stub
  return new NullOutputFormat<TileIdWritable, RasterWritable>().getOutputCommitter(context);
}

Source File: EventInputFormatIT.java From accumulo-recipes with Apache License 2.0

5 votes

@Test
public void test() throws Exception {

    Connector connector = accumuloMiniClusterDriver.getConnector();
    AccumuloEventStore store = new AccumuloEventStore(connector);
    event = EventBuilder.create("", UUID.randomUUID().toString(), System.currentTimeMillis())
        .attr(new Attribute("key1", "val1"))
        .attr(new Attribute("key2", false)).build();
    store.save(singleton(event));
    store.flush();

    Job job = new Job(new Configuration());
    job.setJarByClass(getClass());
    job.setMapperClass(TestMapper.class);
    job.setNumReduceTasks(0);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setInputFormatClass(EventInputFormat.class);
    EventInputFormat.setZooKeeperInstance(job, accumuloMiniClusterDriver.getClientConfiguration());
    EventInputFormat.setInputInfo(job, "root", accumuloMiniClusterDriver.getRootPassword().getBytes(), new Authorizations());
    EventInputFormat.setQueryInfo(job, new Date(System.currentTimeMillis() - 50000), new Date(), Collections.singleton(""),
            QueryBuilder.create().eq("key1", "val1").build());
    job.setOutputFormatClass(NullOutputFormat.class);

    job.submit();
    job.waitForCompletion(true);

    assertNotNull(TestMapper.entry);
    assertEquals(TestMapper.entry.getId(), event.getId());
    assertTrue(TestMapper.entry.getTimestamp() - event.getTimestamp() < 50);
    assertEquals(new HashSet<Attribute>(TestMapper.entry.getAttributes()), new HashSet<Attribute>(event.getAttributes()));

}

Source File: TestMiniCoronaRunJob.java From RDFS with Apache License 2.0

5 votes

public void testLocality() throws Exception {
  LOG.info("Starting testOneTaskWithOneTaskTracker");
  String[] racks = "/rack-1,/rack-1,/rack-2,/rack-3".split(",");
  String[] trackers = "tracker-1,tracker-2,tracker-3,tracker-4".split(",");
  String locationsCsv = "tracker-1,tracker-1,tracker-3,tracker-3";
  corona = new MiniCoronaCluster.Builder().
    numTaskTrackers(4).
    racks(racks).
    hosts(trackers).
    build();
  Configuration conf = corona.createJobConf();
  conf.set("mapred.job.tracker", "corona");
  conf.set("mapred.job.tracker.class", CoronaJobTracker.class.getName());
  conf.set("test.locations", locationsCsv);
  Job job = new Job(conf);
  long start = System.currentTimeMillis();
  job.setMapperClass(TstJob.TestMapper.class);
  job.setInputFormatClass(TstJob.TestInputFormat.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setNumReduceTasks(0);
  job.getConfiguration().set("io.sort.record.pct", "0.50");
  job.getConfiguration().set("io.sort.mb", "25");
  boolean success =  job.waitForCompletion(true);
  long end = System.currentTimeMillis();
  new ClusterManagerMetricsVerifier(corona.getClusterManager(),
      4, 0, 4, 0, 4, 0, 0, 0).verifyAll();
  LOG.info("Time spent for testMemoryLimit:" +
      (end - start));
  assertTrue("Job did not succeed", success);
}

Source File: TestCorruptThriftRecords.java From parquet-mr with Apache License 2.0

5 votes

protected void setupJob(Job job, Path path) throws Exception {
  job.setInputFormatClass(ParquetThriftInputFormat.class);
  ParquetThriftInputFormat.setInputPaths(job, path);
  ParquetThriftInputFormat.setThriftClass(job.getConfiguration(), StructWithUnionV2.class);

  job.setMapperClass(ReadMapper.class);
  job.setNumReduceTasks(0);
  job.setOutputFormatClass(NullOutputFormat.class);
}

Source File: EntityInputFormatIT.java From accumulo-recipes with Apache License 2.0

5 votes

@Test
public void testQuery() throws Exception {

    Connector connector = accumuloMiniClusterDriver.getConnector();
    AccumuloEntityStore store = new AccumuloEntityStore(connector);
    entity = EntityBuilder.create("type", "id").attr(new Attribute("key1", "val1")).attr(new Attribute("key2", false)).build();
    store.save(singleton(entity));
    store.flush();

    Job job = Job.getInstance();
    job.setJarByClass(getClass());
    job.setMapperClass(TestMapper.class);
    job.setNumReduceTasks(0);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setInputFormatClass(EntityInputFormat.class);
    EntityInputFormat.setZooKeeperInstance(job,accumuloMiniClusterDriver.getClientConfiguration());
    EntityInputFormat.setInputInfo(job, "root", accumuloMiniClusterDriver.getRootPassword().getBytes(), new Authorizations());
    EntityInputFormat.setQueryInfo(job, Collections.singleton("type"),
            QueryBuilder.create().eq("key1", "val1").build(), DEFAULT_SHARD_BUILDER, LEXI_TYPES);
    job.setOutputFormatClass(NullOutputFormat.class);

    job.submit();
    job.waitForCompletion(true);

    assertEquals(1, TestMapper.entities.size());
    assertEquals(TestMapper.entities.get(0).getId(), entity.getId());
    assertEquals(TestMapper.entities.get(0).getType(), entity.getType());
    assertEquals(new HashSet<Attribute>(TestMapper.entities.get(0).getAttributes()), new HashSet<Attribute>(entity.getAttributes()));

}

Source File: IntegrationTestBigLinkedList.java From hbase with Apache License 2.0

5 votes

public int runGenerator(int numMappers, long numNodes, Path tmpOutput,
    Integer width, Integer wrapMultiplier, Integer numWalkers)
    throws Exception {
  LOG.info("Running Generator with numMappers=" + numMappers +", numNodes=" + numNodes);
  createSchema();
  job = Job.getInstance(getConf());

  job.setJobName("Link Generator");
  job.setNumReduceTasks(0);
  job.setJarByClass(getClass());

  FileInputFormat.setInputPaths(job, tmpOutput);
  job.setInputFormatClass(OneFilePerMapperSFIF.class);
  job.setOutputKeyClass(NullWritable.class);
  job.setOutputValueClass(NullWritable.class);

  setJobConf(job, numMappers, numNodes, width, wrapMultiplier, numWalkers);

  setMapperForGenerator(job);

  job.setOutputFormatClass(NullOutputFormat.class);

  job.getConfiguration().setBoolean("mapreduce.map.speculative", false);
  TableMapReduceUtil.addDependencyJars(job);
  TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
                                                 AbstractHBaseTool.class);
  TableMapReduceUtil.initCredentials(job);

  boolean success = jobCompletion(job);

  return success ? 0 : 1;
}

Source File: CompactionTool.java From hbase with Apache License 2.0

5 votes

/**
 * Execute compaction, using a Map-Reduce job.
 */
private int doMapReduce(final FileSystem fs, final Set<Path> toCompactDirs,
    final boolean compactOnce, final boolean major) throws Exception {
  Configuration conf = getConf();
  conf.setBoolean(CONF_COMPACT_ONCE, compactOnce);
  conf.setBoolean(CONF_COMPACT_MAJOR, major);

  Job job = new Job(conf);
  job.setJobName("CompactionTool");
  job.setJarByClass(CompactionTool.class);
  job.setMapperClass(CompactionMapper.class);
  job.setInputFormatClass(CompactionInputFormat.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setMapSpeculativeExecution(false);
  job.setNumReduceTasks(0);

  // add dependencies (including HBase ones)
  TableMapReduceUtil.addDependencyJars(job);

  Path stagingDir = JobUtil.getQualifiedStagingDir(conf);
  FileSystem stagingFs = stagingDir.getFileSystem(conf);
  try {
    // Create input file with the store dirs
    Path inputPath = new Path(stagingDir, "compact-"+ EnvironmentEdgeManager.currentTime());
    List<Path> storeDirs = CompactionInputFormat.createInputFile(fs, stagingFs,
        inputPath, toCompactDirs);
    CompactionInputFormat.addInputPath(job, inputPath);

    // Initialize credential for secure cluster
    TableMapReduceUtil.initCredentials(job);
    // Despite the method name this will get delegation token for the filesystem
    TokenCache.obtainTokensForNamenodes(job.getCredentials(),
      storeDirs.toArray(new Path[0]), conf);

    // Start the MR Job and wait
    return job.waitForCompletion(true) ? 0 : 1;
  } finally {
    fs.delete(stagingDir, true);
  }
}

Source File: RowCounter.java From hbase with Apache License 2.0

5 votes

/**
 * Sets up the actual job.
 *
 * @param conf  The current configuration.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public Job createSubmittableJob(Configuration conf) throws IOException {
  Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
  job.setJarByClass(RowCounter.class);
  Scan scan = new Scan();
  scan.setCacheBlocks(false);
  setScanFilter(scan, rowRangeList);

  for (String columnName : this.columns) {
    String family = StringUtils.substringBefore(columnName, ":");
    String qualifier = StringUtils.substringAfter(columnName, ":");
    if (StringUtils.isBlank(qualifier)) {
      scan.addFamily(Bytes.toBytes(family));
    } else {
      scan.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier));
    }
  }

  if(this.expectedCount >= 0) {
    conf.setLong(EXPECTED_COUNT_KEY, this.expectedCount);
  }

  scan.setTimeRange(startTime, endTime);
  job.setOutputFormatClass(NullOutputFormat.class);
  TableMapReduceUtil.initTableMapperJob(tableName, scan,
    RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);
  job.setNumReduceTasks(0);
  return job;
}

Source File: Mapper2HbaseDemo.java From bigdata-tutorial with Apache License 2.0

5 votes

public int run(String[] args) throws Exception {
	String input = args[0];
	Configuration conf = getConf();
	Job job = Job.getInstance(conf, Mapper2HbaseDemo.class.getSimpleName());
	job.setJarByClass(Mapper2HbaseDemo.class);
	job.setMapperClass(Map.class);
	job.setNumReduceTasks(0);
	job.setInputFormatClass(TextInputFormat.class);
	TextInputFormat.setInputPaths(job, input);
	job.setOutputFormatClass(NullOutputFormat.class);
	return job.waitForCompletion(true) ? 0 : 1;
}

Source File: AvroConversionBaseCreator.java From datacollector with Apache License 2.0

5 votes

@Override
public Job call() throws Exception {
  // We're explicitly disabling speculative execution
  conf.set("mapreduce.map.speculative", "false");
  conf.set("mapreduce.map.maxattempts", "1");

  conf.set("mapreduce.job.user.classpath.first", "true");
  conf.set("mapreduce.task.classpath.user.precedence", "true");
  conf.set("mapreduce.task.classpath.first", "true");

  addNecessaryJarsToJob(conf);

  Job job = Job.getInstance(conf);

  // IO formats
  job.setInputFormatClass(getInputFormatClass());
  job.setOutputFormatClass(NullOutputFormat.class);

  // Mapper & job output
  job.setMapperClass(getMapperClass());
  job.setOutputKeyClass(NullWritable.class);
  job.setOutputValueClass(NullWritable.class);

  // It's map only job
  job.setNumReduceTasks(0);

  // General configuration
  job.setJarByClass(getClass());


  return job;
}

org.apache.hadoop.mapreduce.lib.output.NullOutputFormat Java Examples