org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil Java Exaples

Source File: TransformBaseRunner.java From BigDataPlatform with GNU General Public License v3.0

6 votes

protected Job initJob(Configuration conf) throws IOException {
    Job job = Job.getInstance(conf, this.jobName);

    job.setJarByClass(this.runnerClass);
    // 本地运行
//    TableMapReduceUtil.initTableMapperJob(initScans(job), this.mapperClass, this.mapOutputKeyClass, this.mapOutputValueClass, job, false);
    TableMapReduceUtil.initTableMapperJob(initScans(job), this.mapperClass, this.mapOutputKeyClass, this.mapOutputValueClass, job, true);
    // 集群运行：本地提交和打包(jar)提交
    // TableMapReduceUtil.initTableMapperJob(initScans(job),
    // this.mapperClass, this.mapOutputKeyClass, this.mapOutputValueClass,
    // job);
    job.setReducerClass(this.reducerClass);
    job.setOutputKeyClass(this.outputKeyClass);
    job.setOutputValueClass(this.outputValueClass);
    job.setOutputFormatClass(this.outputFormatClass);
    return job;
  }

Source File: LeastRecentlyUsedPruner.java From metron with Apache License 2.0

6 votes

public static void setupHBaseJob(Job job, String sourceTable, String cf) throws IOException {
        Scan scan = new Scan();
        if(cf != null) {
            scan.addFamily(Bytes.toBytes(cf));
        }
        scan.setCaching(500);        // 1 is the default in Scan, which will be bad for MapReduce jobs
        scan.setCacheBlocks(false);  // don't set to true for MR jobs
// set other scan attrs

        TableMapReduceUtil.initTableMapperJob(
                sourceTable,      // input table
                scan,	          // Scan instance to control CF and attribute selection
                PrunerMapper.class,   // mapper class
                null,	          // mapper output key
                null,	          // mapper output value
                job);
        TableMapReduceUtil.initTableReducerJob(
                sourceTable,      // output table
                null,             // reducer class
                job);
    }

Source File: IntegrationTestLoadAndVerify.java From hbase with Apache License 2.0

6 votes

protected Job doLoad(Configuration conf, TableDescriptor tableDescriptor) throws Exception {
  Path outputDir = getTestDir(TEST_NAME, "load-output");
  LOG.info("Load output dir: " + outputDir);

  NMapInputFormat.setNumMapTasks(conf, conf.getInt(NUM_MAP_TASKS_KEY, NUM_MAP_TASKS_DEFAULT));
  conf.set(TABLE_NAME_KEY, tableDescriptor.getTableName().getNameAsString());

  Job job = Job.getInstance(conf);
  job.setJobName(TEST_NAME + " Load for " + tableDescriptor.getTableName());
  job.setJarByClass(this.getClass());
  setMapperClass(job);
  job.setInputFormatClass(NMapInputFormat.class);
  job.setNumReduceTasks(0);
  setJobScannerConf(job);
  FileOutputFormat.setOutputPath(job, outputDir);

  TableMapReduceUtil.addDependencyJars(job);

  TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), AbstractHBaseTool.class);
  TableMapReduceUtil.initCredentials(job);
  assertTrue(job.waitForCompletion(true));
  return job;
}

Source File: TableInputFormatTest.java From hgraphdb with Apache License 2.0

6 votes

private void runTestOnTable() throws InterruptedException, ClassNotFoundException {
    Job job = null;
    try {
        Configuration conf = graph.configuration().toHBaseConfiguration();
        job = Job.getInstance(conf, "test123");
        job.setOutputFormatClass(NullOutputFormat.class);
        job.setNumReduceTasks(0);
        Scan scan = new Scan();
        scan.addColumn(FAMILY_NAME, COLUMN_NAME);
        scan.setTimeRange(MINSTAMP, MAXSTAMP);
        scan.setMaxVersions();
        TableMapReduceUtil.initTableMapperJob(TABLE_NAME.getNameAsString(),
                scan, ProcessTimeRangeMapper.class, Text.class, Text.class, job,
                true, TableInputFormat.class);
        job.waitForCompletion(true);
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } finally {
        if (job != null) {
            FileUtil.fullyDelete(
                    new File(job.getConfiguration().get("hadoop.tmp.dir")));
        }
    }
}

Source File: IntegrationTestLoadAndVerify.java From hbase with Apache License 2.0

6 votes

protected void doVerify(Configuration conf, TableDescriptor tableDescriptor) throws Exception {
  Path outputDir = getTestDir(TEST_NAME, "verify-output");
  LOG.info("Verify output dir: " + outputDir);

  Job job = Job.getInstance(conf);
  job.setJarByClass(this.getClass());
  job.setJobName(TEST_NAME + " Verification for " + tableDescriptor.getTableName());
  setJobScannerConf(job);

  Scan scan = new Scan();

  TableMapReduceUtil.initTableMapperJob(
      tableDescriptor.getTableName().getNameAsString(), scan, VerifyMapper.class,
      BytesWritable.class, BytesWritable.class, job);
  TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), AbstractHBaseTool.class);
  int scannerCaching = conf.getInt("verify.scannercaching", SCANNER_CACHING);
  TableMapReduceUtil.setScannerCaching(job, scannerCaching);

  job.setReducerClass(VerifyReducer.class);
  job.setNumReduceTasks(conf.getInt(NUM_REDUCE_TASKS_KEY, NUM_REDUCE_TASKS_DEFAULT));
  FileOutputFormat.setOutputPath(job, outputDir);
  assertTrue(job.waitForCompletion(true));

  long numOutputRecords = job.getCounters().findCounter(Counters.ROWS_WRITTEN).getValue();
  assertEquals(0, numOutputRecords);
}

Source File: CellCounter.java From cloud-bigtable-examples with Apache License 2.0

6 votes

/**
 * Sets up the actual job.
 *
 * @param conf The current configuration.
 * @param args The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args)
    throws IOException {
  String tableName = args[0];
  Path outputDir = new Path(args[1]);
  String reportSeparatorString = (args.length > 2) ? args[2]: ":";
  conf.set("ReportSeparator", reportSeparatorString);
  Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
  job.setJarByClass(CellCounter.class);
  Scan scan = getConfiguredScanForJob(conf, args);
  TableMapReduceUtil.initTableMapperJob(tableName, scan,
      CellCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);
  job.setNumReduceTasks(1);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(IntWritable.class);
  job.setOutputFormatClass(TextOutputFormat.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  FileOutputFormat.setOutputPath(job, outputDir);
  job.setReducerClass(IntSumReducer.class);
  return job;
}

Source File: HFileOutputFormat3.java From kylin with Apache License 2.0

6 votes

public static void configureIncrementalLoadMap(Job job, Table table) throws IOException {
    Configuration conf = job.getConfiguration();

    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setOutputFormatClass(HFileOutputFormat3.class);

    // Set compression algorithms based on column families
    configureCompression(conf, table.getTableDescriptor());
    configureBloomType(table.getTableDescriptor(), conf);
    configureBlockSize(table.getTableDescriptor(), conf);
    HTableDescriptor tableDescriptor = table.getTableDescriptor();
    configureDataBlockEncoding(tableDescriptor, conf);

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.initCredentials(job);
    LOG.info("Incremental table " + table.getName() + " output configured.");
}

Source File: UpdateClusterJob.java From recsys-offline with Apache License 2.0

6 votes

public void run() {

		try {
			Job job = Job.getInstance(HBaseContext.config, "UpdateClusterJob");
			job.setJarByClass(UpdateClusterJob.class);

			Scan scan = new Scan();
			scan.setCaching(500);
			scan.setCacheBlocks(false);
			TableMapReduceUtil.initTableMapperJob(
					Constants.hbase_cluster_model_table, scan,
					HBaseReadMapper.class, Text.class, Text.class, job);
			TableMapReduceUtil.initTableReducerJob(
					Constants.hbase_cluster_model_table,
					HBaseWriteReducer.class, job);
			job.setNumReduceTasks(4);

			boolean b = job.waitForCompletion(true);
			if (!b) {
				throw new IOException("error with job!");
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

Source File: IndexScrutinyTool.java From phoenix with Apache License 2.0

6 votes

private Job configureSubmittableJob(Job job, Path outputPath, Class<IndexScrutinyMapperForTest> mapperClass) throws Exception {
    Configuration conf = job.getConfiguration();
    conf.setBoolean("mapreduce.job.user.classpath.first", true);
    HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
    job.setJarByClass(IndexScrutinyTool.class);
    job.setOutputFormatClass(NullOutputFormat.class);
    if (outputInvalidRows && OutputFormat.FILE.equals(outputFormat)) {
        job.setOutputFormatClass(TextOutputFormat.class);
        FileOutputFormat.setOutputPath(job, outputPath);
    }
    job.setMapperClass((mapperClass == null ? IndexScrutinyMapper.class : mapperClass));
    job.setNumReduceTasks(0);
    // Set the Output classes
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    TableMapReduceUtil.addDependencyJars(job);
    return job;
}

Source File: IndexTool.java From phoenix with Apache License 2.0

6 votes

private Job configureSubmittableJobUsingDirectApi(Job job) throws Exception {
    job.setReducerClass(PhoenixIndexImportDirectReducer.class);
    Configuration conf = job.getConfiguration();
    HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
    // Set the Physical Table name for use in DirectHTableWriter#write(Mutation)
    conf.set(TableOutputFormat.OUTPUT_TABLE,
        PhoenixConfigurationUtil.getPhysicalTableName(job.getConfiguration()));
    //Set the Output classes
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);
    TableMapReduceUtil.addDependencyJars(job);
    job.setNumReduceTasks(1);
    return job;
}

Source File: HBaseBulkImportJob.java From aliyun-maxcompute-data-collectors with Apache License 2.0

6 votes

@Override
protected void jobSetup(Job job) throws IOException, ImportException {
  super.jobSetup(job);

  // we shouldn't have gotten here if bulk load dir is not set
  // so let's throw a ImportException
  if(getContext().getDestination() == null){
    throw new ImportException("Can't run HBaseBulkImportJob without a " +
        "valid destination directory.");
  }

  TableMapReduceUtil.addDependencyJars(job.getConfiguration(), Preconditions.class);
  FileOutputFormat.setOutputPath(job, getContext().getDestination());
  HTable hTable = new HTable(job.getConfiguration(), options.getHBaseTable());
  HFileOutputFormat.configureIncrementalLoad(job, hTable);
}

Source File: HFileOutputFormat3.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

public static void configureIncrementalLoadMap(Job job, Table table) throws IOException {
    Configuration conf = job.getConfiguration();

    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setOutputFormatClass(HFileOutputFormat3.class);

    // Set compression algorithms based on column families
    configureCompression(conf, table.getTableDescriptor());
    configureBloomType(table.getTableDescriptor(), conf);
    configureBlockSize(table.getTableDescriptor(), conf);
    HTableDescriptor tableDescriptor = table.getTableDescriptor();
    configureDataBlockEncoding(tableDescriptor, conf);

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.initCredentials(job);
    LOG.info("Incremental table " + table.getName() + " output configured.");
}

Source File: IntegrationTestWithCellVisibilityLoadAndVerify.java From hbase with Apache License 2.0

6 votes

private Job doVerify(Configuration conf, TableDescriptor tableDescriptor, String... auths)
    throws IOException, InterruptedException, ClassNotFoundException {
  Path outputDir = getTestDir(TEST_NAME, "verify-output");
  Job job = new Job(conf);
  job.setJarByClass(this.getClass());
  job.setJobName(TEST_NAME + " Verification for " + tableDescriptor.getTableName());
  setJobScannerConf(job);
  Scan scan = new Scan();
  scan.setAuthorizations(new Authorizations(auths));
  TableMapReduceUtil.initTableMapperJob(tableDescriptor.getTableName().getNameAsString(), scan,
      VerifyMapper.class, NullWritable.class, NullWritable.class, job);
  TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), AbstractHBaseTool.class);
  int scannerCaching = conf.getInt("verify.scannercaching", SCANNER_CACHING);
  TableMapReduceUtil.setScannerCaching(job, scannerCaching);
  job.setNumReduceTasks(0);
  FileOutputFormat.setOutputPath(job, outputDir);
  assertTrue(job.waitForCompletion(true));
  return job;
}

Source File: WCRunner.java From BigDataArchitect with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration(true);
        conf.set("hbase.zookeeper.quorum","node04,node02,node03");
        conf.set("mapreduce.app-submission.cross-platform","true");
        conf.set("mapreduce.framework.name","local");

        //创建job对象
        Job job = Job.getInstance(conf);
        job.setJarByClass(WCRunner.class);

        //设置mapper类
        job.setMapperClass(WCMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);

        //设置reduce类
//        job.setReducerClass();
//        TableMapReduceUtil.initTableMapperJob();
        TableMapReduceUtil.initTableReducerJob("wc",WCReducer.class,job,null,null,null,null,false);
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(Put.class);

        //指定hdfs存储数据的目录
        FileInputFormat.addInputPath(job,new Path("/wc/wc"));
        job.waitForCompletion(true);
    }

Source File: TransformerBaseRunner.java From BigDataArchitect with Apache License 2.0

6 votes

/**
 * 创建job
 * 
 * @param conf
 * @return
 * @throws IOException
 */
protected Job initJob(Configuration conf) throws IOException {
    Job job = Job.getInstance(conf, this.jobName);

    job.setJarByClass(this.runnerClass);
    // 本地运行
    TableMapReduceUtil.initTableMapperJob(initScans(job), this.mapperClass, this.mapOutputKeyClass, this.mapOutputValueClass, job, false);
    // 集群运行：本地提交和打包(jar)提交
    // TableMapReduceUtil.initTableMapperJob(initScans(job),
    // this.mapperClass, this.mapOutputKeyClass, this.mapOutputValueClass,
    // job);
    job.setReducerClass(this.reducerClass);
    job.setOutputKeyClass(this.outputKeyClass);
    job.setOutputValueClass(this.outputValueClass);
    job.setOutputFormatClass(this.outputFormatClass);
    return job;
}

Source File: AnalyserLogDataRunner.java From BigDataArchitect with Apache License 2.0

6 votes

@Override
public int run(String[] args) throws Exception {
	Configuration conf = this.getConf();
	this.processArgs(conf, args);

	Job job = Job.getInstance(conf, "analyser_logdata");


	job.setJarByClass(AnalyserLogDataRunner.class);
	job.setMapperClass(AnalyserLogDataMapper.class);
	job.setMapOutputKeyClass(NullWritable.class);
	job.setMapOutputValueClass(Put.class);
	TableMapReduceUtil.initTableReducerJob(
			EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job, null, null,
			null, null, false);
	job.setNumReduceTasks(0);

	// 设置输入路径
	this.setJobInputPaths(job);
	return job.waitForCompletion(true) ? 0 : -1;
}

Source File: JobFileProcessor.java From hraven with Apache License 2.0

6 votes

/**
 * @param conf to use to create and run the job
 * @param scan to be used to scan the raw table.
 * @param totalJobCount the total number of jobs that need to be run in this
 *          batch. Used in job name.
 * @return The job to be submitted to the cluster.
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
private Job getProcessingJob(Configuration conf, Scan scan, int totalJobCount)
    throws IOException {

  Configuration confClone = new Configuration(conf);

  // Turn off speculative execution.
  // Note: must be BEFORE the job construction with the new mapreduce API.
  confClone.setBoolean("mapred.map.tasks.speculative.execution", false);

  // Set up job
  Job job = new Job(confClone, getJobName(totalJobCount));

  // This is a map-only class, skip reduce step
  job.setNumReduceTasks(0);
  job.setJarByClass(JobFileProcessor.class);
  job.setOutputFormatClass(MultiTableOutputFormat.class);

  TableMapReduceUtil.initTableMapperJob(Constants.HISTORY_RAW_TABLE, scan,
      JobFileTableMapper.class, JobFileTableMapper.getOutputKeyClass(),
      JobFileTableMapper.getOutputValueClass(), job);

  return job;
}

Source File: AnalyserLogDataRunner.java From BigDataPlatform with GNU General Public License v3.0

6 votes

@Override
  public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();
    this.processArgs(conf, args);

    Job job = Job.getInstance(conf, "analyser_logdata");
    job.setJarByClass(AnalyserLogDataRunner.class);
    job.setMapperClass(AnalyserLogDataMapper.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Put.class);

    //设置reducer配置
    //1集群上运行 打成jar运行  (要求addDependencyJars为true(默认true)
//    TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job);
    TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job,null,
        null,null,null,true);
    //2本地运行 打成jar运行  (要求addDependencyJars为true(默认true)
//    TableMapReduceUtil
//        .initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job, null, null, null,
//            null, false);
    //设置输入路径
    job.setNumReduceTasks(0);
    this.setJobInputPaths(job);
    return job.waitForCompletion(true) ? 0 : -1;
  }

Source File: IntegrationTestBigLinkedList.java From hbase with Apache License 2.0

5 votes

public int runGenerator(int numMappers, long numNodes, Path tmpOutput,
    Integer width, Integer wrapMultiplier, Integer numWalkers)
    throws Exception {
  LOG.info("Running Generator with numMappers=" + numMappers +", numNodes=" + numNodes);
  createSchema();
  job = Job.getInstance(getConf());

  job.setJobName("Link Generator");
  job.setNumReduceTasks(0);
  job.setJarByClass(getClass());

  FileInputFormat.setInputPaths(job, tmpOutput);
  job.setInputFormatClass(OneFilePerMapperSFIF.class);
  job.setOutputKeyClass(NullWritable.class);
  job.setOutputValueClass(NullWritable.class);

  setJobConf(job, numMappers, numNodes, width, wrapMultiplier, numWalkers);

  setMapperForGenerator(job);

  job.setOutputFormatClass(NullOutputFormat.class);

  job.getConfiguration().setBoolean("mapreduce.map.speculative", false);
  TableMapReduceUtil.addDependencyJars(job);
  TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
                                                 AbstractHBaseTool.class);
  TableMapReduceUtil.initCredentials(job);

  boolean success = jobCompletion(job);

  return success ? 0 : 1;
}

Source File: WordCountHBase.java From cloud-bigtable-examples with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
  Configuration conf = HBaseConfiguration.create();
  String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
  if (otherArgs.length < 2) {
    System.err.println("Usage: wordcount-hbase <in> [<in>...] <table-name>");
    System.exit(2);
  }

  Job job = Job.getInstance(conf, "word count");

  for (int i = 0; i < otherArgs.length - 1; ++i) {
    FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
  }

  TableName tableName = TableName.valueOf(otherArgs[otherArgs.length - 1]);
  try {
    CreateTable.createTable(tableName, conf,
        Collections.singletonList(Bytes.toString(COLUMN_FAMILY)));
  } catch (Exception e) {
    LOG.error("Could not create the table.", e);
  }

  job.setJarByClass(WordCountHBase.class);
  job.setMapperClass(TokenizerMapper.class);
  job.setMapOutputValueClass(IntWritable.class);

  TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), MyTableReducer.class, job);

  System.exit(job.waitForCompletion(true) ? 0 : 1);
}

Source File: PhoenixConfigurationUtil.java From phoenix with Apache License 2.0

5 votes

public static void loadHBaseConfiguration(Job job) throws IOException {
    // load hbase-site.xml
    Configuration hbaseConf = HBaseConfiguration.create();
    for (Map.Entry<String, String> entry : hbaseConf) {
        if (job.getConfiguration().get(entry.getKey()) == null) {
            job.getConfiguration().set(entry.getKey(), entry.getValue());
        }
    }
    //In order to have phoenix working on a secured cluster
    TableMapReduceUtil.initCredentials(job);
}

Source File: MapreduceDependencyClasspathTool.java From hbase with Apache License 2.0

5 votes

@Override
public int run(String[] args) throws Exception {
  if (args.length > 0) {
    System.err.println("Usage: hbase mapredcp [-Dtmpjars=...]");
    System.err.println("  Construct a CLASSPATH containing dependency jars required to run a mapreduce");
    System.err.println("  job. By default, includes any jars detected by TableMapReduceUtils. Provide");
    System.err.println("  additional entries by specifying a comma-separated list in tmpjars.");
    return 0;
  }

  TableMapReduceUtil.addHBaseDependencyJars(getConf());
  System.out.println(TableMapReduceUtil.buildDependencyClasspath(getConf()));
  return 0;
}

Source File: PerformanceEvaluation.java From hbase with Apache License 2.0

5 votes

/**
 * Run a mapreduce job.  Run as many maps as asked-for clients.
 * Before we start up the job, write out an input file with instruction
 * per client regards which row they are to start on.
 * @param cmd Command to run.
 */
private void doMapReduce(final Class<? extends Test> cmd)
    throws IOException, InterruptedException, ClassNotFoundException {
  Configuration conf = getConf();
  Path inputDir = writeInputFile(conf);
  conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
  conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
  Job job = Job.getInstance(conf);
  job.setJarByClass(PerformanceEvaluation.class);
  job.setJobName("HBase Performance Evaluation");

  job.setInputFormatClass(PeInputFormat.class);
  PeInputFormat.setInputPaths(job, inputDir);

  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(LongWritable.class);

  job.setMapperClass(EvaluationMapTask.class);
  job.setReducerClass(LongSumReducer.class);
  job.setNumReduceTasks(1);

  job.setOutputFormatClass(TextOutputFormat.class);
  TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));
  TableMapReduceUtil.addDependencyJars(job);
  TableMapReduceUtil.initCredentials(job);
  job.waitForCompletion(true);
}

Source File: UpdateStatisticsTool.java From phoenix with Apache License 2.0

5 votes

@Override
public int run(String[] args) throws Exception {
    parseArgs(args);
    preJobTask();
    configureJob();
    TableMapReduceUtil.initCredentials(job);
    int ret = runJob();
    postJobTask();
    return ret;
}

Source File: UpdateStatisticsTool.java From phoenix with Apache License 2.0

5 votes

private void configureJob() throws Exception {
    job = Job.getInstance(getConf(),
            "UpdateStatistics-" + tableName + "-" + snapshotName);
    PhoenixMapReduceUtil.setInput(job, NullDBWritable.class,
            snapshotName, tableName, restoreDir);

    PhoenixConfigurationUtil.setMRJobType(job.getConfiguration(), MRJobType.UPDATE_STATS);

    // DO NOT allow mapper splits using statistics since it may result into many smaller chunks
    PhoenixConfigurationUtil.setSplitByStats(job.getConfiguration(), false);

    job.setJarByClass(UpdateStatisticsTool.class);
    job.setMapperClass(TableSnapshotMapper.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setOutputFormatClass(NullOutputFormat.class);
    job.setNumReduceTasks(0);
    job.setPriority(this.jobPriority);

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), PhoenixConnection.class, Chronology.class,
            CharStream.class, TransactionSystemClient.class, TransactionNotInProgressException.class,
            ZKClient.class, DiscoveryServiceClient.class, ZKDiscoveryService.class,
            Cancellable.class, TTransportException.class, SpanReceiver.class, TransactionProcessor.class, Gauge.class, MetricRegistriesImpl.class);
    LOGGER.info("UpdateStatisticsTool running for: " + tableName
            + " on snapshot: " + snapshotName + " with restore dir: " + restoreDir);
}

Source File: PrepareClusterJob.java From recsys-offline with Apache License 2.0

5 votes

public void run() {
	
	try {
		Job job = Job.getInstance(HBaseContext.config, "ClusterPrepareJob");
		job.setJarByClass(PrepareClusterJob.class);

		Scan scan = new Scan();
		scan.setCaching(500);
		scan.setCacheBlocks(false);
		scan.addColumn(Constants.hbase_column_family.getBytes(),
				Constants.hbase_column_yearrate.getBytes());
		scan.addColumn(Constants.hbase_column_family.getBytes(),
				Constants.hbase_column_repaylimittime.getBytes());
		scan.addColumn(Constants.hbase_column_family.getBytes(),
				Constants.hbase_column_progress.getBytes());

		Filter filter = new SingleColumnValueFilter(Bytes.toBytes(Constants.hbase_column_family), 
				Bytes.toBytes(Constants.hbase_column_progress), CompareOp.NOT_EQUAL, Bytes.toBytes("100"));

		scan.setFilter(filter);

		TableMapReduceUtil.initTableMapperJob(Constants.hbase_p2p_table,
				scan, HBaseReadMapper.class, Text.class, Text.class, job);
		TableMapReduceUtil.initTableReducerJob(
				Constants.hbase_cluster_model_table,
				HBaseWriteReducer.class, job);
		job.setNumReduceTasks(1);

		boolean b = job.waitForCompletion(true);
		if (!b) {
			throw new IOException("error with job!");
		}
	} catch (Exception e) {
		e.printStackTrace();
	}
}

Source File: IntegrationTestBigLinkedList.java From hbase with Apache License 2.0

5 votes

public int runRandomInputGenerator(int numMappers, long numNodes, Path tmpOutput,
    Integer width, Integer wrapMultiplier, Integer numWalkers)
    throws Exception {
  LOG.info("Running RandomInputGenerator with numMappers=" + numMappers
      + ", numNodes=" + numNodes);
  Job job = Job.getInstance(getConf());

  job.setJobName("Random Input Generator");
  job.setNumReduceTasks(0);
  job.setJarByClass(getClass());

  job.setInputFormatClass(GeneratorInputFormat.class);
  job.setOutputKeyClass(BytesWritable.class);
  job.setOutputValueClass(NullWritable.class);

  setJobConf(job, numMappers, numNodes, width, wrapMultiplier, numWalkers);

  job.setMapperClass(Mapper.class); //identity mapper

  FileOutputFormat.setOutputPath(job, tmpOutput);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), Random64.class);

  boolean success = jobCompletion(job);

  return success ? 0 : 1;
}

Source File: IntegrationTestBigLinkedListWithVisibility.java From hbase with Apache License 2.0

5 votes

private int doVerify(Path outputDir, int numReducers) throws IOException, InterruptedException,
    ClassNotFoundException {
  job = new Job(getConf());

  job.setJobName("Link Verifier");
  job.setNumReduceTasks(numReducers);
  job.setJarByClass(getClass());

  setJobScannerConf(job);

  Scan scan = new Scan();
  scan.addColumn(FAMILY_NAME, COLUMN_PREV);
  scan.setCaching(10000);
  scan.setCacheBlocks(false);
  String[] split = labels.split(COMMA);

  scan.setAuthorizations(new Authorizations(split[this.labelIndex * 2],
      split[(this.labelIndex * 2) + 1]));

  TableMapReduceUtil.initTableMapperJob(tableName.getName(), scan, VerifyMapper.class,
      BytesWritable.class, BytesWritable.class, job);
  TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class);

  job.getConfiguration().setBoolean("mapreduce.map.speculative", false);

  job.setReducerClass(VerifyReducer.class);
  job.setOutputFormatClass(TextOutputFormat.class);
  TextOutputFormat.setOutputPath(job, outputDir);
  boolean success = job.waitForCompletion(true);

  return success ? 0 : 1;
}

Source File: CompactionTool.java From hbase with Apache License 2.0

5 votes

/**
 * Execute compaction, using a Map-Reduce job.
 */
private int doMapReduce(final FileSystem fs, final Set<Path> toCompactDirs,
    final boolean compactOnce, final boolean major) throws Exception {
  Configuration conf = getConf();
  conf.setBoolean(CONF_COMPACT_ONCE, compactOnce);
  conf.setBoolean(CONF_COMPACT_MAJOR, major);

  Job job = new Job(conf);
  job.setJobName("CompactionTool");
  job.setJarByClass(CompactionTool.class);
  job.setMapperClass(CompactionMapper.class);
  job.setInputFormatClass(CompactionInputFormat.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setMapSpeculativeExecution(false);
  job.setNumReduceTasks(0);

  // add dependencies (including HBase ones)
  TableMapReduceUtil.addDependencyJars(job);

  Path stagingDir = JobUtil.getQualifiedStagingDir(conf);
  FileSystem stagingFs = stagingDir.getFileSystem(conf);
  try {
    // Create input file with the store dirs
    Path inputPath = new Path(stagingDir, "compact-"+ EnvironmentEdgeManager.currentTime());
    List<Path> storeDirs = CompactionInputFormat.createInputFile(fs, stagingFs,
        inputPath, toCompactDirs);
    CompactionInputFormat.addInputPath(job, inputPath);

    // Initialize credential for secure cluster
    TableMapReduceUtil.initCredentials(job);
    // Despite the method name this will get delegation token for the filesystem
    TokenCache.obtainTokensForNamenodes(job.getCredentials(),
      storeDirs.toArray(new Path[0]), conf);

    // Start the MR Job and wait
    return job.waitForCompletion(true) ? 0 : 1;
  } finally {
    fs.delete(stagingDir, true);
  }
}

Source File: HBaseViewKeyInputFormat.java From kite with Apache License 2.0

5 votes

private TableInputFormat getDelegate(Configuration conf) throws IOException {
  TableInputFormat delegate = new TableInputFormat();
  String tableName = HBaseMetadataProvider.getTableName(dataset.getName());
  conf.set(TableInputFormat.INPUT_TABLE, tableName);
  if (view != null) {
    Job tempJob = new Job();
    Scan scan = ((BaseEntityScanner) view.newEntityScanner()).getScan();
    TableMapReduceUtil.initTableMapperJob(tableName, scan, TableMapper.class, null,
        null, tempJob);
    Configuration tempConf = Hadoop.JobContext.getConfiguration.invoke(tempJob);
    conf.set(SCAN, tempConf.get(SCAN));
  }
  delegate.setConf(conf);
  return delegate;
}

org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil Java Examples