org.apache.accumulo.core.client.mapreduce.AccumuloFileOutputFormat Java Examples

The following examples show how to use org.apache.accumulo.core.client.mapreduce.AccumuloFileOutputFormat. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FlinkEnvManager.java    From OSTMap with Apache License 2.0 6 votes vote down vote up
/**
 * creates output format to write data from flink DataSet to accumulo
 * @return
 * @throws AccumuloSecurityException
 */
public HadoopOutputFormat getHadoopOF() throws AccumuloSecurityException, IOException {

    if(job == null){
        job = Job.getInstance(new Configuration(), jobName);
    }
    AccumuloOutputFormat.setConnectorInfo(job, accumuloUser, new PasswordToken(accumuloPassword));
    ClientConfiguration clientConfig = new ClientConfiguration();
    clientConfig.withInstance(accumuloInstanceName);
    clientConfig.withZkHosts(accumuloZookeeper);
    AccumuloOutputFormat.setZooKeeperInstance(job, clientConfig);
    AccumuloOutputFormat.setDefaultTableName(job, outTable);
    AccumuloFileOutputFormat.setOutputPath(job,new Path("/tmp"));

    HadoopOutputFormat<Text, Mutation> hadoopOF =
            new HadoopOutputFormat<>(new AccumuloOutputFormat() , job);
    return hadoopOF;
}
 
Example #2
Source File: BulkIngestInputGenerationIT.java    From geowave with Apache License 2.0 5 votes vote down vote up
@Override
public int run(final String[] args) throws Exception {

  final Configuration conf = getConf();
  conf.set("fs.defaultFS", "file:///");

  final Job job = Job.getInstance(conf, JOB_NAME);
  job.setJarByClass(getClass());

  FileInputFormat.setInputPaths(job, new Path(TEST_DATA_LOCATION));
  FileOutputFormat.setOutputPath(job, cleanPathForReuse(conf, OUTPUT_PATH));

  job.setMapperClass(SimpleFeatureToAccumuloKeyValueMapper.class);
  job.setReducerClass(Reducer.class); // (Identity Reducer)

  job.setInputFormatClass(GeonamesDataFileInputFormat.class);
  job.setOutputFormatClass(AccumuloFileOutputFormat.class);

  job.setMapOutputKeyClass(Key.class);
  job.setMapOutputValueClass(Value.class);
  job.setOutputKeyClass(Key.class);
  job.setOutputValueClass(Value.class);

  job.setNumReduceTasks(1);
  job.setSpeculativeExecution(false);

  final boolean result = job.waitForCompletion(true);

  mapInputRecords =
      job.getCounters().findCounter(TASK_COUNTER_GROUP_NAME, MAP_INPUT_RECORDS).getValue();

  mapOutputRecords =
      job.getCounters().findCounter(TASK_COUNTER_GROUP_NAME, MAP_OUTPUT_RECORDS).getValue();

  return result ? 0 : 1;
}
 
Example #3
Source File: FluoFileOutputFormatIT.java    From fluo with Apache License 2.0 4 votes vote down vote up
@Test
public void testImportFile() throws Exception {

  File inDir = new File(tempFolder.getRoot(), "in");
  Assert.assertTrue(inDir.mkdir());
  File outDir = new File(tempFolder.getRoot(), "out");
  File failDir = new File(tempFolder.getRoot(), "fail");
  Assert.assertTrue(failDir.mkdir());

  // generate some data for map reduce to read
  PrintWriter writer =
      new PrintWriter(new File(inDir, "file1.txt"), StandardCharsets.UTF_8.name());
  writer.println("a,b,c,1");
  writer.println("d,b,c,2");
  writer.println("foo,moo,moo,90");
  writer.close();

  // run map reduce job to generate rfiles
  JobConf jconf = new JobConf();
  jconf.set("mapred.job.tracker", "true");
  jconf.set("fs.defaultFS", "file:///");
  @SuppressWarnings("deprecation")
  Job job = new Job(jconf);
  job.setInputFormatClass(TextInputFormat.class);
  FileInputFormat.setInputPaths(job, inDir.toURI().toString());
  job.setOutputFormatClass(AccumuloFileOutputFormat.class);
  AccumuloFileOutputFormat.setOutputPath(job, new Path(outDir.toURI()));
  job.setMapperClass(TestMapper.class);
  job.setNumReduceTasks(0);
  Assert.assertTrue(job.waitForCompletion(false));

  // bulk import rfiles
  aClient.tableOperations().importDirectory(table, outDir.toString(), failDir.toString(), false);

  // read and update data using transactions
  TestTransaction tx1 = new TestTransaction(env);
  TestTransaction tx2 = new TestTransaction(env);

  Assert.assertEquals("1", tx1.gets("a", new Column("b", "c")));
  Assert.assertEquals("2", tx1.gets("d", new Column("b", "c")));
  Assert.assertEquals("90", tx1.gets("foo", new Column("moo", "moo")));

  tx1.set("a", new Column("b", "c"), "3");
  tx1.delete("d", new Column("b", "c"));

  tx1.done();

  // should not see changes from tx1
  Assert.assertEquals("1", tx2.gets("a", new Column("b", "c")));
  Assert.assertEquals("2", tx2.gets("d", new Column("b", "c")));
  Assert.assertEquals("90", tx2.gets("foo", new Column("moo", "moo")));

  TestTransaction tx3 = new TestTransaction(env);

  // should see changes from tx1
  Assert.assertEquals("3", tx3.gets("a", new Column("b", "c")));
  Assert.assertNull(tx3.gets("d", new Column("b", "c")));
  Assert.assertEquals("90", tx3.gets("foo", new Column("moo", "moo")));
}