org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil#initTableReducerJob

Source File: AnalyserLogDataRunner.java From BigDataPlatform with GNU General Public License v3.0

6 votes

@Override
  public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();
    this.processArgs(conf, args);

    Job job = Job.getInstance(conf, "analyser_logdata");
    job.setJarByClass(AnalyserLogDataRunner.class);
    job.setMapperClass(AnalyserLogDataMapper.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Put.class);

    //设置reducer配置
    //1集群上运行 打成jar运行  (要求addDependencyJars为true(默认true)
//    TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job);
    TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job,null,
        null,null,null,true);
    //2本地运行 打成jar运行  (要求addDependencyJars为true(默认true)
//    TableMapReduceUtil
//        .initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job, null, null, null,
//            null, false);
    //设置输入路径
    job.setNumReduceTasks(0);
    this.setJobInputPaths(job);
    return job.waitForCompletion(true) ? 0 : -1;
  }

Source File: AnalyserLogDataRunner.java From BigDataArchitect with Apache License 2.0

6 votes

@Override
public int run(String[] args) throws Exception {
	Configuration conf = this.getConf();
	this.processArgs(conf, args);

	Job job = Job.getInstance(conf, "analyser_logdata");


	job.setJarByClass(AnalyserLogDataRunner.class);
	job.setMapperClass(AnalyserLogDataMapper.class);
	job.setMapOutputKeyClass(NullWritable.class);
	job.setMapOutputValueClass(Put.class);
	TableMapReduceUtil.initTableReducerJob(
			EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job, null, null,
			null, null, false);
	job.setNumReduceTasks(0);

	// 设置输入路径
	this.setJobInputPaths(job);
	return job.waitForCompletion(true) ? 0 : -1;
}

Source File: WCRunner.java From BigDataArchitect with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration(true);
        conf.set("hbase.zookeeper.quorum","node04,node02,node03");
        conf.set("mapreduce.app-submission.cross-platform","true");
        conf.set("mapreduce.framework.name","local");

        //创建job对象
        Job job = Job.getInstance(conf);
        job.setJarByClass(WCRunner.class);

        //设置mapper类
        job.setMapperClass(WCMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);

        //设置reduce类
//        job.setReducerClass();
//        TableMapReduceUtil.initTableMapperJob();
        TableMapReduceUtil.initTableReducerJob("wc",WCReducer.class,job,null,null,null,null,false);
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(Put.class);

        //指定hdfs存储数据的目录
        FileInputFormat.addInputPath(job,new Path("/wc/wc"));
        job.waitForCompletion(true);
    }

Source File: LeastRecentlyUsedPruner.java From metron with Apache License 2.0

6 votes

public static void setupHBaseJob(Job job, String sourceTable, String cf) throws IOException {
        Scan scan = new Scan();
        if(cf != null) {
            scan.addFamily(Bytes.toBytes(cf));
        }
        scan.setCaching(500);        // 1 is the default in Scan, which will be bad for MapReduce jobs
        scan.setCacheBlocks(false);  // don't set to true for MR jobs
// set other scan attrs

        TableMapReduceUtil.initTableMapperJob(
                sourceTable,      // input table
                scan,	          // Scan instance to control CF and attribute selection
                PrunerMapper.class,   // mapper class
                null,	          // mapper output key
                null,	          // mapper output value
                job);
        TableMapReduceUtil.initTableReducerJob(
                sourceTable,      // output table
                null,             // reducer class
                job);
    }

Source File: UpdateClusterJob.java From recsys-offline with Apache License 2.0

6 votes

public void run() {

		try {
			Job job = Job.getInstance(HBaseContext.config, "UpdateClusterJob");
			job.setJarByClass(UpdateClusterJob.class);

			Scan scan = new Scan();
			scan.setCaching(500);
			scan.setCacheBlocks(false);
			TableMapReduceUtil.initTableMapperJob(
					Constants.hbase_cluster_model_table, scan,
					HBaseReadMapper.class, Text.class, Text.class, job);
			TableMapReduceUtil.initTableReducerJob(
					Constants.hbase_cluster_model_table,
					HBaseWriteReducer.class, job);
			job.setNumReduceTasks(4);

			boolean b = job.waitForCompletion(true);
			if (!b) {
				throw new IOException("error with job!");
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

Source File: AnalyserLogDataRunner.java From BigDataArchitect with Apache License 2.0

5 votes

@Override
public int run(String[] args) throws Exception {
	Configuration conf = this.getConf();
	this.processArgs(conf, args);

	Job job = Job.getInstance(conf, "analyser_logdata");

	// 设置本地提交job，集群运行，需要代码
	// File jarFile = EJob.createTempJar("target/classes");
	// ((JobConf) job.getConfiguration()).setJar(jarFile.toString());
	// 设置本地提交job，集群运行，需要代码结束

	job.setJarByClass(AnalyserLogDataRunner.class);
	job.setMapperClass(AnalyserLogDataMapper.class);
	job.setMapOutputKeyClass(NullWritable.class);
	job.setMapOutputValueClass(Put.class);
	// 设置reducer配置
	// 1. 集群上运行，打成jar运行(要求addDependencyJars参数为true，默认就是true)
	// TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS,
	// null, job);
	// 2. 本地运行，要求参数addDependencyJars为false
	TableMapReduceUtil.initTableReducerJob(
			EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job, null, null,
			null, null, false);
	job.setNumReduceTasks(0);

	// 设置输入路径
	this.setJobInputPaths(job);
	return job.waitForCompletion(true) ? 0 : -1;
}

Source File: AnalyserLogDataRunner.java From BigDataArchitect with Apache License 2.0

5 votes

@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();
    this.processArgs(conf, args);

    Job job = Job.getInstance(conf, "analyser_logdata");

    // 设置本地提交job，集群运行，需要代码
    // File jarFile = EJob.createTempJar("target/classes");
    // ((JobConf) job.getConfiguration()).setJar(jarFile.toString());
    // 设置本地提交job，集群运行，需要代码结束

    job.setJarByClass(AnalyserLogDataRunner.class);
    job.setMapperClass(AnalyserLogDataMapper.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Put.class);
    // 设置reducer配置
    // 1. 集群上运行，打成jar运行(要求addDependencyJars参数为true，默认就是true)
    // TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job);
    // 2. 本地运行，要求参数addDependencyJars为false
    TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job, null, null, null, null, false);
    job.setNumReduceTasks(0);

    // 设置输入路径
    this.setJobInputPaths(job);
    return job.waitForCompletion(true) ? 0 : -1;
}

Source File: WordCountHBase.java From cloud-bigtable-examples with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
  Configuration conf = HBaseConfiguration.create();
  String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
  if (otherArgs.length < 2) {
    System.err.println("Usage: wordcount-hbase <in> [<in>...] <table-name>");
    System.exit(2);
  }

  Job job = Job.getInstance(conf, "word count");

  for (int i = 0; i < otherArgs.length - 1; ++i) {
    FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
  }

  TableName tableName = TableName.valueOf(otherArgs[otherArgs.length - 1]);
  try {
    CreateTable.createTable(tableName, conf,
        Collections.singletonList(Bytes.toString(COLUMN_FAMILY)));
  } catch (Exception e) {
    LOG.error("Could not create the table.", e);
  }

  job.setJarByClass(WordCountHBase.class);
  job.setMapperClass(TokenizerMapper.class);
  job.setMapOutputValueClass(IntWritable.class);

  TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), MyTableReducer.class, job);

  System.exit(job.waitForCompletion(true) ? 0 : 1);
}

Source File: PrepareClusterJob.java From recsys-offline with Apache License 2.0

5 votes

public void run() {
	
	try {
		Job job = Job.getInstance(HBaseContext.config, "ClusterPrepareJob");
		job.setJarByClass(PrepareClusterJob.class);

		Scan scan = new Scan();
		scan.setCaching(500);
		scan.setCacheBlocks(false);
		scan.addColumn(Constants.hbase_column_family.getBytes(),
				Constants.hbase_column_yearrate.getBytes());
		scan.addColumn(Constants.hbase_column_family.getBytes(),
				Constants.hbase_column_repaylimittime.getBytes());
		scan.addColumn(Constants.hbase_column_family.getBytes(),
				Constants.hbase_column_progress.getBytes());

		Filter filter = new SingleColumnValueFilter(Bytes.toBytes(Constants.hbase_column_family), 
				Bytes.toBytes(Constants.hbase_column_progress), CompareOp.NOT_EQUAL, Bytes.toBytes("100"));

		scan.setFilter(filter);

		TableMapReduceUtil.initTableMapperJob(Constants.hbase_p2p_table,
				scan, HBaseReadMapper.class, Text.class, Text.class, job);
		TableMapReduceUtil.initTableReducerJob(
				Constants.hbase_cluster_model_table,
				HBaseWriteReducer.class, job);
		job.setNumReduceTasks(1);

		boolean b = job.waitForCompletion(true);
		if (!b) {
			throw new IOException("error with job!");
		}
	} catch (Exception e) {
		e.printStackTrace();
	}
}

Source File: ImportTsv.java From learning-hadoop with Apache License 2.0

4 votes

/**
 * Sets up the actual job.
 * 
 * @param conf
 *            The current configuration.
 * @param args
 *            The command line parameters.
 * @return The newly created job.
 * @throws IOException
 *             When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args)
		throws IOException, ClassNotFoundException {

	// Support non-XML supported characters
	// by re-encoding the passed separator as a Base64 string.
	String actualSeparator = conf.get(SEPARATOR_CONF_KEY);
	if (actualSeparator != null) {
		conf.set(SEPARATOR_CONF_KEY,
				new String(Base64.encodeBytes(actualSeparator.getBytes())));
	}

	// See if a non-default Mapper was set
	String mapperClassName = conf.get(MAPPER_CONF_KEY);
	Class mapperClass = mapperClassName != null ? Class
			.forName(mapperClassName) : DEFAULT_MAPPER;

	String tableName = args[0];
	Path inputDir = new Path(args[1]);
	Job job = new Job(conf, NAME + "_" + tableName);
	job.setJarByClass(mapperClass);
	FileInputFormat.setInputPaths(job, inputDir);

	String inputCodec = conf.get(INPUT_LZO_KEY);
	if (inputCodec == null) {
		FileInputFormat.setMaxInputSplitSize(job, 67108864l); // max split
																// size =
																// 64m
		job.setInputFormatClass(TextInputFormat.class);
	} else {
		if (inputCodec.equalsIgnoreCase("lzo"))
			job.setInputFormatClass(LzoTextInputFormat.class);
		else {
			usage("not supported compression codec!");
			System.exit(-1);
		}
	}

	job.setMapperClass(mapperClass);

	String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
	if (hfileOutPath != null) {
		HTable table = new HTable(conf, tableName);
		job.setReducerClass(PutSortReducer.class);
		Path outputDir = new Path(hfileOutPath);
		FileOutputFormat.setOutputPath(job, outputDir);
		job.setMapOutputKeyClass(ImmutableBytesWritable.class);
		job.setMapOutputValueClass(Put.class);
		HFileOutputFormat.configureIncrementalLoad(job, table);
	} else {
		// No reducers. Just write straight to table. Call
		// initTableReducerJob
		// to set up the TableOutputFormat.
		TableMapReduceUtil.initTableReducerJob(tableName, null, job);
		job.setNumReduceTasks(0);
	}

	TableMapReduceUtil.addDependencyJars(job);
	TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
			com.google.common.base.Function.class /*
												 * Guava used by TsvParser
												 */);
	return job;
}

Source File: JobFileRawLoader.java From hraven with Apache License 2.0

4 votes

/**
 * @param conf to use to create and run the job. Should be an HBase
 *          configuration.
 * @param input path to the processFile * @param totalJobCount the total
 *          number of jobs that need to be run in this batch. Used in job
 *          name.
 * @return whether all job confs were loaded properly.
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
private boolean runRawLoaderJob(Configuration myHBaseConf, String input,
    int totalJobCount)
    throws IOException, InterruptedException, ClassNotFoundException {
  boolean success;

  // Turn off speculative execution.
  // Note: must be BEFORE the job construction with the new mapreduce API.
  myHBaseConf.setBoolean("mapred.map.tasks.speculative.execution", false);

  // Set up job
  Job job = new Job(myHBaseConf, getJobName(totalJobCount));
  job.setJarByClass(JobFileRawLoader.class);

  Path inputPath = new Path(input);

  if (hdfs.exists(inputPath)) {

    // Set input
    job.setInputFormatClass(SequenceFileInputFormat.class);
    SequenceFileInputFormat.setInputPaths(job, inputPath);

    job.setMapperClass(JobFileRawLoaderMapper.class);

    // Set the output format to push data into HBase.
    job.setOutputFormatClass(TableOutputFormat.class);
    TableMapReduceUtil.initTableReducerJob(Constants.HISTORY_RAW_TABLE, null,
        job);

    job.setOutputKeyClass(JobFileRawLoaderMapper.getOutputKeyClass());
    job.setOutputValueClass(JobFileRawLoaderMapper.getOutputValueClass());

    // This is a map-only class, skip reduce step
    job.setNumReduceTasks(0);

    // Run the job
    success = job.waitForCompletion(true);

    if (success) {
      success = hdfs.delete(inputPath, false);
    }

  } else {
    System.err.println("Unable to find processFile: " + inputPath);
    success = false;
  }
  return success;
}

Source File: HBaseSinkMapReduce.java From hiped2 with Apache License 2.0

2 votes

/**
 * The MapReduce driver - setup and launch the job.
 *
 * @param args the command-line arguments
 * @return the process exit code
 * @throws Exception if something goes wrong
 */
public int run(final String[] args) throws Exception {


  Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
  int result = cli.runCmd();

  if (result != 0) {
    return result;
  }

  Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
  Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));

  Configuration conf = super.getConf();

  HBaseWriter.createTableAndColumn(conf, STOCKS_IMPORT_TABLE_NAME,
      HBaseWriter.STOCK_DETAILS_COLUMN_FAMILY_AS_BYTES);

  Job job = new Job(conf);

  job.setJarByClass(HBaseSinkMapReduce.class);

  TableMapReduceUtil.initTableReducerJob(
      STOCKS_IMPORT_TABLE_NAME,
      IdentityTableReducer.class,
      job);

  job.setMapperClass(MapClass.class);

  job.setMapOutputKeyClass(StockPriceWritable.class);
  job.setMapOutputValueClass(Put.class);

  FileInputFormat.setInputPaths(job, inputPath);
  FileOutputFormat.setOutputPath(job, outputPath);

  outputPath.getFileSystem(conf).delete(outputPath, true);

  if (job.waitForCompletion(true)) {
    return 0;
  }
  return 1;
}

Java Code Examples for org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil#initTableReducerJob()