org.apache.hadoop.hbase.mapreduce.KeyValueSerialization Java Examples
The following examples show how to use
org.apache.hadoop.hbase.mapreduce.KeyValueSerialization.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HFileOutputFormat3.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
static void configureIncrementalLoad(Job job, HTableDescriptor tableDescriptor, RegionLocator regionLocator, Class<? extends OutputFormat<?, ?>> cls) throws IOException, UnsupportedEncodingException { Configuration conf = job.getConfiguration(); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(KeyValue.class); job.setOutputFormatClass(cls); // Based on the configured map output class, set the correct reducer to properly // sort the incoming values. // TODO it would be nice to pick one or the other of these formats. if (KeyValue.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(KeyValueSortReducer.class); } else if (Put.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(PutSortReducer.class); } else if (Text.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(TextSortReducer.class); } else { LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass()); } conf.setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), KeyValueSerialization.class.getName()); // Use table's region boundaries for TOP split points. LOG.info("Looking up current regions for table " + tableDescriptor.getTableName()); List<ImmutableBytesWritable> startKeys = getRegionStartKeys(regionLocator); LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count"); job.setNumReduceTasks(startKeys.size()); configurePartitioner(job, startKeys); // Set compression algorithms based on column families configureCompression(conf, tableDescriptor); configureBloomType(tableDescriptor, conf); configureBlockSize(tableDescriptor, conf); configureDataBlockEncoding(tableDescriptor, conf); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.initCredentials(job); LOG.info("Incremental table " + regionLocator.getName() + " output configured."); }
Example #2
Source File: HFileOutputFormat3.java From kylin with Apache License 2.0 | 4 votes |
static void configureIncrementalLoad(Job job, HTableDescriptor tableDescriptor, RegionLocator regionLocator, Class<? extends OutputFormat<?, ?>> cls) throws IOException, UnsupportedEncodingException { Configuration conf = job.getConfiguration(); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(KeyValue.class); job.setOutputFormatClass(cls); // Based on the configured map output class, set the correct reducer to properly // sort the incoming values. // TODO it would be nice to pick one or the other of these formats. if (KeyValue.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(KeyValueSortReducer.class); } else if (Put.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(PutSortReducer.class); } else if (Text.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(TextSortReducer.class); } else { LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass()); } conf.setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), KeyValueSerialization.class.getName()); if (conf.getBoolean(LOCALITY_SENSITIVE_CONF_KEY, DEFAULT_LOCALITY_SENSITIVE)) { // record this table name for creating writer by favored nodes LOG.info("bulkload locality sensitive enabled"); conf.set(OUTPUT_TABLE_NAME_CONF_KEY, regionLocator.getName().getNameAsString()); } // Use table's region boundaries for TOP split points. LOG.info("Looking up current regions for table " + tableDescriptor.getTableName()); List<ImmutableBytesWritable> startKeys = getRegionStartKeys(regionLocator); LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count"); job.setNumReduceTasks(startKeys.size()); configurePartitioner(job, startKeys); // Set compression algorithms based on column families configureCompression(conf, tableDescriptor); configureBloomType(tableDescriptor, conf); configureBlockSize(tableDescriptor, conf); configureDataBlockEncoding(tableDescriptor, conf); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.initCredentials(job); LOG.info("Incremental table " + regionLocator.getName() + " output configured."); }
Example #3
Source File: MultiHfileOutputFormat.java From phoenix with Apache License 2.0 | 4 votes |
/** * Configures the job for MultiHfileOutputFormat. * @param job * @param tablesToBeLoaded * @throws IOException */ @SuppressWarnings("deprecation") public static void configureIncrementalLoad(Job job, List<TargetTableRef> tablesToBeLoaded) throws IOException { Configuration conf = job.getConfiguration(); job.setOutputFormatClass(MultiHfileOutputFormat.class); conf.setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), KeyValueSerialization.class.getName()); // tableStartKeys for all tables. Set<TableRowkeyPair> tablesStartKeys = Sets.newTreeSet(); for(TargetTableRef table : tablesToBeLoaded) { final String tableName = table.getPhysicalName(); try(Connection hbaseConn = ConnectionFactory.createConnection(conf);){ Set<TableRowkeyPair> startKeys = getRegionStartKeys(tableName, hbaseConn.getRegionLocator(TableName.valueOf(tableName))); tablesStartKeys.addAll(startKeys); TableDescriptor tableDescriptor = hbaseConn.getTable(TableName.valueOf(tableName)).getDescriptor(); String compressionConfig = configureCompression(tableDescriptor); String bloomTypeConfig = configureBloomType(tableDescriptor); String blockSizeConfig = configureBlockSize(tableDescriptor); String blockEncodingConfig = configureDataBlockEncoding(tableDescriptor); Map<String,String> tableConfigs = Maps.newHashMap(); if(StringUtils.isNotBlank(compressionConfig)) { tableConfigs.put(COMPRESSION_FAMILIES_CONF_KEY, compressionConfig); } if(StringUtils.isNotBlank(bloomTypeConfig)) { tableConfigs.put(BLOOM_TYPE_FAMILIES_CONF_KEY,bloomTypeConfig); } if(StringUtils.isNotBlank(blockSizeConfig)) { tableConfigs.put(BLOCK_SIZE_FAMILIES_CONF_KEY,blockSizeConfig); } if(StringUtils.isNotBlank(blockEncodingConfig)) { tableConfigs.put(DATABLOCK_ENCODING_FAMILIES_CONF_KEY,blockEncodingConfig); } table.setConfiguration(tableConfigs); final String tableDefns = TargetTableRefFunctions.TO_JSON.apply(table); // set the table definition in the config to be used during the RecordWriter.. conf.set(tableName, tableDefns); TargetTableRef tbl = TargetTableRefFunctions.FROM_JSON.apply(tableDefns); LOGGER.info(" the table logical name is "+ tbl.getLogicalName()); } } LOGGER.info("Configuring " + tablesStartKeys.size() + " reduce partitions to match current region count"); job.setNumReduceTasks(tablesStartKeys.size()); configurePartitioner(job, tablesStartKeys); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.initCredentials(job); }