org.apache.hive.hcatalog.data.DefaultHCatRecord Java Examples
The following examples show how to use
org.apache.hive.hcatalog.data.DefaultHCatRecord.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HdfsUtil.java From ES-Fastloader with Apache License 2.0 | 6 votes |
public static Job getHdfsJob(Configuration conf, TaskConfig taskConfig, IndexInfo indexInfo) throws Exception { Job job = Job.getInstance(conf, MAIN_CLASS); job.setJobName("DidiFastIndex_" + taskConfig.getEsTemplate()); job.setJarByClass(FastIndex.class); job.setMapperClass(FastIndexMapper.class); job.setInputFormatClass(HCatInputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(DefaultHCatRecord.class); HCatInputFormat.setInput(job, taskConfig.getHiveDB(), taskConfig.getHiveTable(), taskConfig.getFilterStr()); job.setReducerClass(FastIndexReducer.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); job.setNumReduceTasks(indexInfo.getReducerNum()); job.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(taskConfig.getHdfsMROutputPath())); return job; }
Example #2
Source File: FastIndexMapper.java From ES-Fastloader with Apache License 2.0 | 6 votes |
@Override protected void map(Object key, HCatRecord value, Context context) throws IOException, InterruptedException { DefaultHCatRecord hCatRecord = (DefaultHCatRecord) value; int shardNo; List<String> keyList = taskConfig.getKeyList(); if(keyList==null || keyList.size()==0) { shardNo = (int) (Math.random()*templateConfig.getReducerNum()); } else { String keyStr = getKeyValue(keyList, hCatRecord); shardNo = CommonUtils.getShardId(keyStr, templateConfig.getReducerNum()); } //shard分片个数与reduce个数一样 context.write(new IntWritable(shardNo), hCatRecord); }
Example #3
Source File: FastIndexMapper.java From ES-Fastloader with Apache License 2.0 | 6 votes |
private String getKeyValue(List<String> keys, DefaultHCatRecord hCatRecord) throws HCatException { StringBuilder sb = new StringBuilder(); for (String key : keys) { Object id = hCatRecord.get(key, this.schema); if (id == null || StringUtils.isBlank(id.toString())) { sb.append(""); } else { sb.append(id.toString()); } sb.append("_"); } if (sb.length() > 1) { return sb.substring(0, sb.length() - 1); } else { return sb.toString(); } }
Example #4
Source File: HCatalogTestUtils.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 6 votes |
private List<HCatRecord> generateHCatRecords(int numRecords, HCatSchema hCatTblSchema, ColumnGenerator... extraCols) throws Exception { List<HCatRecord> records = new ArrayList<HCatRecord>(); List<HCatFieldSchema> hCatTblCols = hCatTblSchema.getFields(); int size = hCatTblCols.size(); for (int i = 0; i < numRecords; ++i) { DefaultHCatRecord record = new DefaultHCatRecord(size); record.set(hCatTblCols.get(0).getName(), hCatTblSchema, i); record.set(hCatTblCols.get(1).getName(), hCatTblSchema, "textfield" + i); int idx = 0; for (int j = 0; j < extraCols.length; ++j) { if (extraCols[j].getKeyType() == KeyType.STATIC_KEY) { continue; } record.set(hCatTblCols.get(idx + 2).getName(), hCatTblSchema, extraCols[j].getHCatValue(i)); ++idx; } records.add(record); } return records; }
Example #5
Source File: TableDataInserterTest.java From HiveRunner with Apache License 2.0 | 5 votes |
@Test public void insertsRowsIntoExistingTable() { Multimap<Map<String, String>, HCatRecord> data = ImmutableMultimap .<Map<String, String>, HCatRecord>builder() .put(of("local_date", "2015-10-14"), new DefaultHCatRecord(asList((Object) "aa", "bb"))) .put(of("local_date", "2015-10-14"), new DefaultHCatRecord(asList((Object) "aa2", "bb2"))) .put(of("local_date", "2015-10-14"), new DefaultHCatRecord(asList((Object) "cc", "dd"))) .put(of("local_date", "2015-10-15"), new DefaultHCatRecord(asList((Object) "ee", "ff"))) .build(); TableDataInserter inserter = new TableDataInserter(TEST_DB, TEST_TABLE, hiveShell.getHiveConf()); inserter.insert(data); List<String> result = hiveShell.executeQuery("select * from testdb.test_table"); Collections.sort(result); assertEquals(4, result.size()); assertEquals("aa", result.get(0).split("\t")[0]); assertEquals("bb", result.get(0).split("\t")[1]); assertEquals("2015-10-14", result.get(0).split("\t")[2]); assertEquals("aa2", result.get(1).split("\t")[0]); assertEquals("bb2", result.get(1).split("\t")[1]); assertEquals("2015-10-14", result.get(1).split("\t")[2]); assertEquals("cc", result.get(2).split("\t")[0]); assertEquals("dd", result.get(2).split("\t")[1]); assertEquals("2015-10-14", result.get(2).split("\t")[2]); assertEquals("ee", result.get(3).split("\t")[0]); assertEquals("ff", result.get(3).split("\t")[1]); assertEquals("2015-10-15", result.get(3).split("\t")[2]); }
Example #6
Source File: HCatInputFormatBase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Creates a HCatInputFormat for the given database, table, and * {@link org.apache.hadoop.conf.Configuration}. * By default, the InputFormat returns {@link org.apache.hive.hcatalog.data.HCatRecord}. * The return type of the InputFormat can be changed to Flink-native tuples by calling * {@link HCatInputFormatBase#asFlinkTuples()}. * * @param database The name of the database to read from. * @param table The name of the table to read. * @param config The Configuration for the InputFormat. * @throws java.io.IOException */ public HCatInputFormatBase(String database, String table, Configuration config) throws IOException { super(); this.configuration = config; HadoopUtils.mergeHadoopConf(this.configuration); this.hCatInputFormat = org.apache.hive.hcatalog.mapreduce.HCatInputFormat.setInput(this.configuration, database, table); this.outputSchema = org.apache.hive.hcatalog.mapreduce.HCatInputFormat.getTableSchema(this.configuration); // configure output schema of HCatFormat configuration.set("mapreduce.lib.hcat.output.schema", HCatUtil.serialize(outputSchema)); // set type information this.resultType = new WritableTypeInfo(DefaultHCatRecord.class); }
Example #7
Source File: TableDataBuilder.java From HiveRunner with Apache License 2.0 | 5 votes |
TableDataBuilder copyRow() { checkState(row != null, "No previous row to copy."); HCatRecord copy = new DefaultHCatRecord(new ArrayList<>(row.getAll())); flushRow(); row = copy; return this; }
Example #8
Source File: HCatInputFormatBase.java From flink with Apache License 2.0 | 5 votes |
/** * Creates a HCatInputFormat for the given database, table, and * {@link org.apache.hadoop.conf.Configuration}. * By default, the InputFormat returns {@link org.apache.hive.hcatalog.data.HCatRecord}. * The return type of the InputFormat can be changed to Flink-native tuples by calling * {@link HCatInputFormatBase#asFlinkTuples()}. * * @param database The name of the database to read from. * @param table The name of the table to read. * @param config The Configuration for the InputFormat. * @throws java.io.IOException */ public HCatInputFormatBase(String database, String table, Configuration config) throws IOException { super(); this.configuration = config; HadoopUtils.mergeHadoopConf(this.configuration); this.hCatInputFormat = org.apache.hive.hcatalog.mapreduce.HCatInputFormat.setInput(this.configuration, database, table); this.outputSchema = org.apache.hive.hcatalog.mapreduce.HCatInputFormat.getTableSchema(this.configuration); // configure output schema of HCatFormat configuration.set("mapreduce.lib.hcat.output.schema", HCatUtil.serialize(outputSchema)); // set type information this.resultType = new WritableTypeInfo(DefaultHCatRecord.class); }
Example #9
Source File: FactDistinctColumnsMapperTest.java From kylin with Apache License 2.0 | 5 votes |
@Test public void testMapper() throws IOException { Configuration configuration = mapDriver.getConfiguration(); configuration.set(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT, "100"); configuration.set(BatchConstants.CFG_CUBE_NAME, "test_kylin_cube_with_slr_1_new_segment"); configuration.set(BatchConstants.CFG_CUBE_SEGMENT_ID, "198va32a-a33e-4b69-83dd-0bb8b1f8c53b"); HCatRecord value1 = new DefaultHCatRecord(11); value1.set(0, "2012-08-16"); value1.set(1, "48027"); value1.set(2, "0"); value1.set(3, "Home & Garden"); value1.set(4, "Cheese & Crackers"); value1.set(5, "Cheese & Crackers"); value1.set(6, "48027"); value1.set(7, "16"); value1.set(8, "10000010"); value1.set(9, "204.28"); value1.set(10, "5"); mapDriver.addInput(new LongWritable(0), value1); List<Pair<SelfDefineSortableKey, Text>> result = mapDriver.run(); int colsNeedDictSize = cubeDesc.getAllColumnsNeedDictionaryBuilt().size(); int cuboidsCnt = cubeDesc.getAllCuboids().size(); assertEquals( colsNeedDictSize + (cubeDesc.getRowkey().getRowKeyColumns().length - colsNeedDictSize) * 2 + cuboidsCnt, result.size()); }
Example #10
Source File: HCatInputFormatBase.java From flink with Apache License 2.0 | 5 votes |
/** * Creates a HCatInputFormat for the given database, table, and * {@link org.apache.hadoop.conf.Configuration}. * By default, the InputFormat returns {@link org.apache.hive.hcatalog.data.HCatRecord}. * The return type of the InputFormat can be changed to Flink-native tuples by calling * {@link HCatInputFormatBase#asFlinkTuples()}. * * @param database The name of the database to read from. * @param table The name of the table to read. * @param config The Configuration for the InputFormat. * @throws java.io.IOException */ public HCatInputFormatBase(String database, String table, Configuration config) throws IOException { super(); this.configuration = config; HadoopUtils.mergeHadoopConf(this.configuration); this.hCatInputFormat = org.apache.hive.hcatalog.mapreduce.HCatInputFormat.setInput(this.configuration, database, table); this.outputSchema = org.apache.hive.hcatalog.mapreduce.HCatInputFormat.getTableSchema(this.configuration); // configure output schema of HCatFormat configuration.set("mapreduce.lib.hcat.output.schema", HCatUtil.serialize(outputSchema)); // set type information this.resultType = new WritableTypeInfo(DefaultHCatRecord.class); }
Example #11
Source File: FastIndexReducer.java From ES-Fastloader with Apache License 2.0 | 5 votes |
@Override protected void reduce(IntWritable key, Iterable<DefaultHCatRecord> values, Context context) throws IOException, InterruptedException { this.reduceId = key.get(); LogUtils.info("reduce start, es reduceNo is:" + reduceId); Iterator<DefaultHCatRecord> records = values.iterator(); while (records.hasNext()) { DefaultHCatRecord record = records.next(); if (record != null) { JSONObject jsonObject = transformer.tranform(record.getAll()); String Primekey; List<String> keyList = taskConfig.getKeyList(); if(keyList==null || keyList.size()==0) { Primekey = UUID.randomUUID().toString(); } else { Primekey = getKeyValue(keyList, jsonObject); } esWriter.bulk(Primekey, jsonObject); } } esWriter.finish(); context.write(NullWritable.get(), NullWritable.get()); log.info("reduce finish!"); }
Example #12
Source File: JsonSerdeUtilsTest.java From incubator-hivemall with Apache License 2.0 | 4 votes |
@Test public void testRW() throws Exception { List<Object> rlist = new ArrayList<Object>(13); { rlist.add(new Byte("123")); rlist.add(new Short("456")); rlist.add(new Integer(789)); rlist.add(new Long(1000L)); rlist.add(new Double(5.3D)); rlist.add(new Float(2.39F)); rlist.add(new String("hcat\nand\nhadoop")); rlist.add(null); List<Object> innerStruct = new ArrayList<Object>(2); innerStruct.add(new String("abc")); innerStruct.add(new String("def")); rlist.add(innerStruct); List<Integer> innerList = new ArrayList<Integer>(); innerList.add(314); innerList.add(007); rlist.add(innerList); Map<Short, String> map = new HashMap<Short, String>(3); map.put(new Short("2"), "hcat is cool"); map.put(new Short("3"), "is it?"); map.put(new Short("4"), "or is it not?"); rlist.add(map); rlist.add(new Boolean(true)); List<Object> c1 = new ArrayList<Object>(); List<Object> c1_1 = new ArrayList<Object>(); c1_1.add(new Integer(12)); List<Object> i2 = new ArrayList<Object>(); List<Integer> ii1 = new ArrayList<Integer>(); ii1.add(new Integer(13)); ii1.add(new Integer(14)); i2.add(ii1); Map<String, List<?>> ii2 = new HashMap<String, List<?>>(); List<Integer> iii1 = new ArrayList<Integer>(); iii1.add(new Integer(15)); ii2.put("phew", iii1); i2.add(ii2); c1_1.add(i2); c1.add(c1_1); rlist.add(c1); rlist.add(HiveDecimal.create(new BigDecimal("123.45")));//prec 5, scale 2 rlist.add(new HiveChar("hive\nchar", 10)); rlist.add(new HiveVarchar("hive\nvarchar", 20)); rlist.add(Date.valueOf("2014-01-07")); rlist.add(new Timestamp(System.currentTimeMillis())); rlist.add("hive\nbinary".getBytes("UTF-8")); } DefaultHCatRecord r = new DefaultHCatRecord(rlist); List<String> columnNames = Arrays.asList("ti,si,i,bi,d,f,s,n,r,l,m,b,c1,bd,hc,hvc,dt,ts,bin".split(",")); List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString( "tinyint,smallint,int,bigint,double,float,string,string," + "struct<a:string,b:string>,array<int>,map<smallint,string>,boolean," + "array<struct<i1:int,i2:struct<ii1:array<int>,ii2:map<string,struct<iii1:int>>>>>," + "decimal(5,2),char(10),varchar(20),date,timestamp,binary"); StructTypeInfo rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); HCatRecordObjectInspector objInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo); Text serialized = JsonSerdeUtils.serialize(r, objInspector, columnNames); List<Object> deserialized = JsonSerdeUtils.deserialize(serialized, columnNames, columnTypes); assertRecordEquals(rlist, deserialized); }
Example #13
Source File: JsonSerdeUtilsTest.java From incubator-hivemall with Apache License 2.0 | 4 votes |
@Test public void testRWNull() throws Exception { List<Object> nlist = new ArrayList<Object>(13); { nlist.add(null); // tinyint nlist.add(null); // smallint nlist.add(null); // int nlist.add(null); // bigint nlist.add(null); // double nlist.add(null); // float nlist.add(null); // string nlist.add(null); // string nlist.add(null); // struct nlist.add(null); // array nlist.add(null); // map nlist.add(null); // bool nlist.add(null); // complex nlist.add(null); //decimal(5,2) nlist.add(null); //char(10) nlist.add(null); //varchar(20) nlist.add(null); //date nlist.add(null); //timestamp nlist.add(null); //binary } DefaultHCatRecord r = new DefaultHCatRecord(nlist); List<String> columnNames = Arrays.asList("ti,si,i,bi,d,f,s,n,r,l,m,b,c1,bd,hc,hvc,dt,ts,bin".split(",")); List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString( "tinyint,smallint,int,bigint,double,float,string,string," + "struct<a:string,b:string>,array<int>,map<smallint,string>,boolean," + "array<struct<i1:int,i2:struct<ii1:array<int>,ii2:map<string,struct<iii1:int>>>>>," + "decimal(5,2),char(10),varchar(20),date,timestamp,binary"); StructTypeInfo rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); HCatRecordObjectInspector objInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo); Text serialized = JsonSerdeUtils.serialize(r, objInspector, columnNames); List<Object> deserialized = JsonSerdeUtils.deserialize(serialized, columnNames, columnTypes); assertRecordEquals(nlist, deserialized); }
Example #14
Source File: HCatalogIOTestUtils.java From beam with Apache License 2.0 | 4 votes |
/** returns a DefaultHCatRecord instance for passed value. */ private static DefaultHCatRecord toHCatRecord(int value) { return new DefaultHCatRecord(Arrays.asList("record " + value, value)); }
Example #15
Source File: HCatalogIO.java From beam with Apache License 2.0 | 4 votes |
@Override @SuppressWarnings({"unchecked", "rawtypes"}) public Coder<HCatRecord> getOutputCoder() { return (Coder) WritableCoder.of(DefaultHCatRecord.class); }
Example #16
Source File: HCatalogIOTest.java From beam with Apache License 2.0 | 4 votes |
/** Perform end-to-end test of Write-then-Read operation. */ @Test @NeedsEmptyTestTablesForUnboundedReads public void testWriteThenUnboundedReadSuccess() throws Exception { defaultPipeline .apply(Create.of(buildHCatRecords(TEST_RECORDS_COUNT))) .apply( HCatalogIO.write() .withConfigProperties(getConfigPropertiesAsMap(service.getHiveConf())) .withDatabase(TEST_DATABASE) .withTable(TEST_TABLE) .withPartition(getPartitions()) .withBatchSize(512L)); defaultPipeline.run(); final ImmutableList<String> partitions = ImmutableList.of("load_date", "product_type"); final PCollection<HCatRecord> data = readAfterWritePipeline .apply( "ReadData", HCatalogIO.read() .withConfigProperties(getConfigPropertiesAsMap(service.getHiveConf())) .withDatabase(TEST_DATABASE) .withPartitionCols(partitions) .withTable(TEST_TABLE) .withPollingInterval(Duration.millis(15000)) .withTerminationCondition(Watch.Growth.afterTotalOf(Duration.millis(60000)))) .setCoder((Coder) WritableCoder.of(DefaultHCatRecord.class)); final PCollection<String> output = data.apply( ParDo.of( new DoFn<HCatRecord, String>() { @ProcessElement public void processElement(ProcessContext c) { c.output(c.element().get(0).toString()); } })); PAssert.that(output).containsInAnyOrder(getExpectedRecords(TEST_RECORDS_COUNT)); readAfterWritePipeline.run(); }
Example #17
Source File: HCatalogTestUtils.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 4 votes |
public List<HCatRecord> loadHCatTable(String dbName, String tableName, Map<String, String> partKeyMap, HCatSchema tblSchema, List<HCatRecord> records) throws Exception { Job job = new Job(conf, "HCat load job"); job.setJarByClass(this.getClass()); job.setMapperClass(HCatWriterMapper.class); // Just writ 10 lines to the file to drive the mapper Path path = new Path(fs.getWorkingDirectory(), "mapreduce/HCatTableIndexInput"); job.getConfiguration() .setInt(ConfigurationConstants.PROP_MAPRED_MAP_TASKS, 1); int writeCount = records.size(); recsToLoad.clear(); recsToLoad.addAll(records); createInputFile(path, writeCount); // input/output settings HCatWriterMapper.setWrittenRecordCount(0); FileInputFormat.setInputPaths(job, path); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(HCatOutputFormat.class); OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName, partKeyMap); HCatOutputFormat.setOutput(job, outputJobInfo); HCatOutputFormat.setSchema(job, tblSchema); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(DefaultHCatRecord.class); job.setNumReduceTasks(0); SqoopHCatUtilities.addJars(job, new SqoopOptions()); boolean success = job.waitForCompletion(true); if (!success) { throw new IOException("Loading HCatalog table with test records failed"); } utils.invokeOutputCommitterForLocalMode(job); LOG.info("Loaded " + HCatWriterMapper.writtenRecordCount + " records"); return recsToLoad; }
Example #18
Source File: SqoopHCatImportHelper.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 4 votes |
public HCatRecord convertToHCatRecord(SqoopRecord sqr) throws IOException, InterruptedException { try { // Loading of LOBs was delayed until we have a Context. sqr.loadLargeObjects(lobLoader); } catch (SQLException sqlE) { throw new IOException(sqlE); } if (colCount == -1) { colCount = sqr.getFieldMap().size(); } Map<String, Object> fieldMap = sqr.getFieldMap(); HCatRecord result = new DefaultHCatRecord(fieldCount); for (Map.Entry<String, Object> entry : fieldMap.entrySet()) { String key = entry.getKey(); Object val = entry.getValue(); String hfn = key.toLowerCase(); boolean skip = false; if (staticPartitionKeys != null && staticPartitionKeys.length > 0) { for (int i = 0; i < staticPartitionKeys.length; ++i) { if (staticPartitionKeys[i].equals(hfn)) { skip = true; break; } } } if (skip) { continue; } HCatFieldSchema hfs = null; try { hfs = hCatFullTableSchema.get(hfn); } catch (Exception e) { throw new IOException("Unable to lookup " + hfn + " in the hcat schema"); } if (debugHCatImportMapper) { LOG.debug("SqoopRecordVal: field = " + key + " Val " + val + " of type " + (val == null ? null : val.getClass().getName()) + ", hcattype " + hfs.getTypeString()); } Object hCatVal = toHCat(val, hfs); result.set(hfn, hCatFullTableSchema, hCatVal); } return result; }
Example #19
Source File: TableDataBuilder.java From HiveRunner with Apache License 2.0 | 4 votes |
TableDataBuilder newRow() { flushRow(); row = new DefaultHCatRecord(schema.size()); return this; }